Hi Ian , Thanks for the review , replies are inline.
<snipped> > > This allow for more performance and flexibility to the user to choose > > Minor typo above, allow -> allows. > Included in v5 > > the miniflow implementation according to the needs. > > > > Signed-off-by: Kumar Amber <kumar.am...@intel.com> > > Co-authored-by: Harry van Haaren <harry.van.haa...@intel.com> > > Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> > > --- > > lib/automake.mk | 2 + > > lib/dpif-netdev-avx512.c | 32 ++++++-- > > lib/dpif-netdev-private-extract.c | 86 ++++++++++++++++++++ > > lib/dpif-netdev-private-extract.h | 94 ++++++++++++++++++++++ > > lib/dpif-netdev-private-thread.h | 4 + > > lib/dpif-netdev.c | 126 +++++++++++++++++++++++++++++- > > 6 files changed, 337 insertions(+), 7 deletions(-) create mode > > 100644 lib/dpif-netdev-private-extract.c create mode 100644 > > lib/dpif-netdev-private-extract.h > > > > diff --git a/lib/automake.mk b/lib/automake.mk index > > 49f42c2a3..6657b9ae5 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \ > > lib/dpif-netdev-private-dpcls.h \ lib/dpif-netdev-private-dpif.c \ > > lib/dpif-netdev-private-dpif.h \ > > +lib/dpif-netdev-private-extract.c \ > > +lib/dpif-netdev-private-extract.h \ > > lib/dpif-netdev-private-flow.h \ > > lib/dpif-netdev-private-hwol.h \ > > lib/dpif-netdev-private-thread.h \ > > diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c index > > f9b199637..bb99b23ff 100644 > > --- a/lib/dpif-netdev-avx512.c > > +++ b/lib/dpif-netdev-avx512.c > > @@ -148,6 +148,15 @@ dp_netdev_input_outer_avx512(struct > > dp_netdev_pmd_thread *pmd, > > * // do all processing (HWOL->MFEX->EMC->SMC) > > * } > > */ > > + > > + /* Do a batch minfilow extract into keys. */ > > + uint32_t mf_mask = 0; > > + if (pmd->miniflow_extract_opt) { > > + mf_mask = pmd->miniflow_extract_opt(packets, keys, > > + batch_size, in_port, > > + (void *) pmd); > > + } > > + /* Perform first packet interation */ > Would add whitespace above to separate the comment form the conditional > block. Also minor nit, missing period at end of comment. > Included in v5 > > uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1; > > uint32_t iter = lookup_pkts_bitmask; > > while (iter) { > > @@ -159,6 +168,12 @@ dp_netdev_input_outer_avx512(struct > > dp_netdev_pmd_thread *pmd, > > pkt_metadata_init(&packet->md, in_port); > > > > struct dp_netdev_flow *f = NULL; > > + struct netdev_flow_key *key = &keys[i]; > > + > > + /* Check the minfiflow mask to see if the packet was correctly > > + * classifed by vector mfex else do a scalar miniflow extract > > + * for that packet. */ > Typo above for miniflow. Also alignment of * in comment seems out of > place. > Should be vertically aligned. Would also suggest finishing with */ on > separate line after the comment. > Included in v5 > > + uint32_t mfex_hit = (mf_mask & (1 << i)); > > > > /* Check for partial hardware offload mark. */ > > uint32_t mark; > > @@ -166,7 +181,13 @@ dp_netdev_input_outer_avx512(struct > > dp_netdev_pmd_thread *pmd, > > f = mark_to_flow_find(pmd, mark); > > if (f) { > > rules[i] = &f->cr; > > - pkt_meta[i].tcp_flags = parse_tcp_flags(packet); > > + /* If AVX512 MFEX already classified the packet, use it. */ > > + if (mfex_hit) { > > + pkt_meta[i].tcp_flags = > > miniflow_get_tcp_flags(&key->mf); > > + } else { > > + pkt_meta[i].tcp_flags = parse_tcp_flags(packet); > > + } > > + > > pkt_meta[i].bytes = dp_packet_size(packet); > > phwol_hits++; > > hwol_emc_smc_hitmask |= (1 << i); @@ -174,11 +195,12 > > @@ dp_netdev_input_outer_avx512(struct > > dp_netdev_pmd_thread *pmd, > > } > > } > > > > - /* Do miniflow extract into keys. */ > > - struct netdev_flow_key *key = &keys[i]; > > - miniflow_extract(packet, &key->mf); > > + if (!mfex_hit) { > > + /* Do a scalar miniflow extract into keys */ > Minor, missing period in comment. > Included in v5 > > + miniflow_extract(packet, &key->mf); > > + } > > > > - /* Cache TCP and byte values for all packets. */ > > + /* Cache TCP and byte values for all packets */ > Period removed from comment, should be put back. > Included in v5 > > pkt_meta[i].bytes = dp_packet_size(packet); > > pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf); > > > > diff --git a/lib/dpif-netdev-private-extract.c > > b/lib/dpif-netdev-private-extract.c > > new file mode 100644 > > index 000000000..fcc56ef26 > > --- /dev/null > > +++ b/lib/dpif-netdev-private-extract.c > > @@ -0,0 +1,86 @@ > > +/* > > + * Copyright (c) 2021 Intel. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, > > +software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > > implied. > > + * See the License for the specific language governing permissions > > + and > > + * limitations under the License. > > + */ > > + > > +#include <config.h> > > +#include <errno.h> > > +#include <stdint.h> > > +#include <string.h> > > + > > +#include "dp-packet.h" > > +#include "dpif-netdev-private-dpcls.h" > > +#include "dpif-netdev-private-extract.h" > > +#include "dpif-netdev-private-thread.h" > > +#include "flow.h" > > +#include "openvswitch/vlog.h" > > +#include "ovs-thread.h" > > +#include "util.h" > > + > > +VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract); > > + > > +/* Implementations of available extract options. */ static struct > > +dpif_miniflow_extract_impl mfex_impls[] = { > > + { > > + .probe = NULL, > > + .extract_func = NULL, > > + .name = "disable", > > + }, > > +}; > > + > > +BUILD_ASSERT_DECL(MFEX_IMPLS_MAX_SIZE > ARRAY_SIZE(mfex_impls)); > > + > > +int32_t > > +dpif_miniflow_extract_opt_get(const char *name, > > + struct dpif_miniflow_extract_impl > > +**opt) { > > + ovs_assert(opt); > > + ovs_assert(name); > > + > > + uint32_t i; > > + for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) { > > + if (strcmp(name, mfex_impls[i].name) == 0) { > > + *opt = &mfex_impls[i]; > > + return 0; > > + } > > + } > > + return -ENOTSUP; > > +} > > + > > +void > > +dpif_miniflow_extract_init(void) > > +{ > > + /* Call probe on each impl, and cache the result. */ > > + uint32_t i; > > + for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) { > > + int avail = 1; > > + if (mfex_impls[i].probe) { > > + /* Return zero is success, non-zero means error. */ > > + avail = (mfex_impls[i].probe() == 0); > > + } > > + VLOG_INFO("Miniflow Extract implementation %s (available: %s)\n", > > + mfex_impls[i].name, avail ? "True" : "False"); > > + mfex_impls[i].available = avail; > > + } > > +} > > + > > +int32_t > > +dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl > > +**out_ptr) { > > + if (out_ptr == NULL) { > > + return -EINVAL; > > + } > > + *out_ptr = mfex_impls; > > + return ARRAY_SIZE(mfex_impls); > > +} > > diff --git a/lib/dpif-netdev-private-extract.h > > b/lib/dpif-netdev-private-extract.h > > new file mode 100644 > > index 000000000..b7b0b2be4 > > --- /dev/null > > +++ b/lib/dpif-netdev-private-extract.h > > @@ -0,0 +1,94 @@ > > +/* > > + * Copyright (c) 2021 Intel. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, > > +software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > > implied. > > + * See the License for the specific language governing permissions > > + and > > + * limitations under the License. > > + */ > > + > > +#ifndef DPIF_NETDEV_AVX512_EXTRACT > > +#define DPIF_NETDEV_AVX512_EXTRACT 1 > > + > > +#include <sys/types.h> > > + > > +#include "openvswitch/types.h" > > + > > +/* Max size of dpif_miniflow_extract_impl array. */ #define > > +MFEX_IMPLS_MAX_SIZE (16) > > + > > +/* Forward declarations. */ > > +struct dp_packet; > > +struct miniflow; > > +struct dp_netdev_pmd_thread; > > +struct dp_packet_batch; > > +struct netdev_flow_key; > > + > > +/* Function pointer prototype to be implemented in the optimized > > +miniflow > > + * extract code. > > + * returns the hitmask of the processed packets on success. > > + * returns zero on failure. > > + */ > > +typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch > *batch, > > + struct netdev_flow_key *keys, > > + uint32_t keys_size, > > + odp_port_t in_port, > > + void *pmd_handle); > > + > > +/* Probe function is used to detect if this CPU has the ISA required > > + * to run the optimized miniflow implementation. > > + * returns one on successful probe. > > + * returns zero on failure. > > + */ > > +typedef int32_t (*miniflow_extract_probe)(void); > > + > > +/* Structure representing the attributes of an optimized > > +implementation. */ struct dpif_miniflow_extract_impl { > > + /* When non-zero, this impl has passed the probe() checks. */ > > + uint8_t available; > > + > > + /* Probe function is used to detect if this CPU has the ISA required > > + * to run the optimized miniflow implementation. > > + */ > > + miniflow_extract_probe probe; > > + > > + /* Function to call to extract miniflows for a burst of packets. */ > > + miniflow_extract_func extract_func; > > + > > + /* Name of the optimized implementation. */ > > + char *name; > > +}; > > + > > +/* Retrieve the opt structure for the requested implementation by name. > > + * Returns zero on success, and opt points to a valid struct, or > > + * returns a negative failure status. > > + * -ENOTSUP : invalid name requested > > + */ > > +int32_t > > +dpif_miniflow_extract_opt_get(const char *name, > > + struct dpif_miniflow_extract_impl > > +**opt); > > + > > +/* Initializes the available miniflow extract implementations by > > +probing for > > + * the CPU ISA requirements. As the runtime available CPU ISA does > > +not change > > + * and the required ISA of the implementation also does not change, > > +it is safe > > + * to cache the probe() results, and not call probe() at runtime. > > + */ > > +void > > +dpif_miniflow_extract_init(void); > > + > > +/* Retrieve the array of miniflow implementations for iteration. > > + * On error, returns a negative number. > > + * On success, returns the size of the arrays pointed to by the out > parameter. > > + */ > > +int32_t > > +dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl > > +**out_ptr); > > + > > + > > +#endif /* DPIF_NETDEV_AVX512_EXTRACT */ > > diff --git a/lib/dpif-netdev-private-thread.h > > b/lib/dpif-netdev-private-thread.h > > index f89b1ddaa..119eb7396 100644 > > --- a/lib/dpif-netdev-private-thread.h > > +++ b/lib/dpif-netdev-private-thread.h > > @@ -28,6 +28,7 @@ > > #include "dpif-netdev-private-dpif.h" > > #include "dpif-netdev-perf.h" > > #include "openvswitch/thread.h" > > +#include "dpif-netdev-private-extract.h" > Should be placed above #include "openvswitch/thread.h" > Included in v5 > > > > #ifdef __cplusplus > > extern "C" { > > @@ -110,6 +111,9 @@ struct dp_netdev_pmd_thread { > > /* Pointer for per-DPIF implementation scratch space. */ > > void *netdev_input_func_userdata; > > > > + /* Function pointer to call for miniflow_extract() functionality. */ > > + miniflow_extract_func miniflow_extract_opt; > > + > > struct seq *reload_seq; > > uint64_t last_reload_seq; > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index > > f316835a4..567ebd952 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -46,6 +46,7 @@ > > #include "dpif.h" > > #include "dpif-netdev-lookup.h" > > #include "dpif-netdev-perf.h" > > +#include "dpif-netdev-private-extract.h" > > #include "dpif-provider.h" > > #include "dummy.h" > > #include "fat-rwlock.h" > > @@ -1089,6 +1090,102 @@ dpif_netdev_impl_set(struct unixctl_conn > > *conn, int argc, > > ds_destroy(&reply); > > } > > > > +static void > > +dpif_miniflow_extract_impl_get(struct unixctl_conn *conn, int argc > > OVS_UNUSED, > > + const char *argv[] OVS_UNUSED, void *aux > > +OVS_UNUSED) { > > + struct dpif_miniflow_extract_impl *mfex_impls; > > + uint32_t count = dpif_miniflow_extract_info_get(&mfex_impls); > > + if (count == 0) { > > + unixctl_command_reply_error(conn, "error getting mfex names"); > > + return; > > + } > > + > > + /* Add all mfex functions to reply string. */ > > + struct ds reply = DS_EMPTY_INITIALIZER; > > + ds_put_cstr(&reply, "Available Optimized Miniflow Extracts:\n"); > > + for (uint32_t i = 0; i < count; i++) { > > + ds_put_format(&reply, " %s (available: %s)\n", > > + mfex_impls[i].name, mfex_impls[i].available ? > > + "True" : "False"); > > + } > > + unixctl_command_reply(conn, ds_cstr(&reply)); > > + ds_destroy(&reply); > > +} > > + > > +static void > > +dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc, > > + const char *argv[], void *aux OVS_UNUSED) { > > + /* This function requires just one parameter, the miniflow name. > > + * A second optional parameter can identify the datapath instance. > > + */ > > + const char *mfex_name = argv[1]; > > + > > + static const char *error_description[2] = { > > + "Unknown miniflow implementation", > > + "implementation doesn't exist", > > + }; > > + struct dpif_miniflow_extract_impl *opt; > > + miniflow_extract_func new_func; > I would insert white space here to make the code more readable. > Included in v5 > > + int32_t err = dpif_miniflow_extract_opt_get(mfex_name, &opt); > > + if (err) { > > + struct ds reply = DS_EMPTY_INITIALIZER; > > + ds_put_format(&reply, > > + "Miniflow implementation not available: %s %s.\n", > > + error_description[ (err == -ENOTSUP) ], mfex_name); > > + const char *reply_str = ds_cstr(&reply); > > + unixctl_command_reply(conn, reply_str); > > + VLOG_INFO("%s", reply_str); > > + ds_destroy(&reply); > > + return; > > + } > > + new_func = opt->extract_func; > > Again white space here to separate the argv[1] and argv[2] code blocks. > Included in v5 > > + /* argv[2] is optional datapath instance. If no datapath name is > provided. > > + * and only one datapath exists, the one existing datapath is reprobed. > > + */ > > + ovs_mutex_lock(&dp_netdev_mutex); > > + struct dp_netdev *dp = NULL; > > + > > + if (argc == 3) { > > + dp = shash_find_data(&dp_netdevs, argv[2]); > > + } else if (shash_count(&dp_netdevs) == 1) { > > + dp = shash_first(&dp_netdevs)->data; > > + } > > + > > + if (!dp) { > > + ovs_mutex_unlock(&dp_netdev_mutex); > > + unixctl_command_reply_error(conn, > > + "please specify an existing datapath"); > > + return; > > + } > > + > > + /* Get PMD threads list. */ > > + size_t n; > > + struct dp_netdev_pmd_thread **pmd_list; > > + sorted_poll_thread_list(dp, &pmd_list, &n); > > + > > + for (size_t i = 0; i < n; i++) { > > + struct dp_netdev_pmd_thread *pmd = pmd_list[i]; > > + if (pmd->core_id == NON_PMD_CORE_ID) { > > + continue; > > + } > > + > > + /* Set PMD threads miniflow implementation to requested one. */ > > + pmd->miniflow_extract_opt = *new_func; > > + }; > > + > > + ovs_mutex_unlock(&dp_netdev_mutex); > > + > > + /* Reply with success to command. */ > > + struct ds reply = DS_EMPTY_INITIALIZER; > > + ds_put_format(&reply, "Miniflow implementation set to %s.\n", > > mfex_name); > > + const char *reply_str = ds_cstr(&reply); > > + unixctl_command_reply(conn, reply_str); > > + VLOG_INFO("%s", reply_str); > > + ds_destroy(&reply); > > +} > > + > > static void > > dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc, > > const char *argv[], void *aux OVS_UNUSED) > > @@ -1315,9 +1412,16 @@ dpif_netdev_init(void) > > "dpif_implementation_name [dp]", > > 1, 2, dpif_netdev_impl_set, > > NULL); > > + unixctl_command_register("dpif-netdev/miniflow-parser-set", > > + "miniflow implementation name [dp]", > > + 1, 2, dpif_miniflow_extract_impl_set, > > + NULL); > > unixctl_command_register("dpif-netdev/dpif-get", "", > > 0, 0, dpif_netdev_impl_get, > > NULL); > > + unixctl_command_register("dpif-netdev/miniflow-parser-get", "", > > + 0, 0, dpif_miniflow_extract_impl_get, > > + NULL); > > I would prefer not to split the get and set functions above. Try to keep dpif- > netdev/miniflow-parser-set And dpif-netdev/miniflow-parser-get together. > Included in v5. > > return 0; > > } > > > > @@ -1461,6 +1565,8 @@ create_dp_netdev(const char *name, const > struct > > dpif_class *class, > > > > dp->conntrack = conntrack_init(); > > > > + dpif_miniflow_extract_init(); > > + > > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > > atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); > > > > @@ -6176,6 +6282,9 @@ dp_netdev_configure_pmd(struct > > dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > > /* Initialize DPIF function pointer to the default configured version. > > */ > > pmd->netdev_input_func = dp_netdev_impl_get_default(); > > > > + /*Init default miniflow_extract function */ > > + pmd->miniflow_extract_opt = NULL; > > + > > /* init the 'flow_cache' since there is no > > * actual thread created for NON_PMD_CORE_ID. */ > > if (core_id == NON_PMD_CORE_ID) { @@ -6730,10 +6839,12 @@ > > dfc_processing(struct dp_netdev_pmd_thread *pmd, { > > struct netdev_flow_key *key = &keys[0]; > > size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0; > > + struct dp_packet_batch single_packet; > > struct dfc_cache *cache = &pmd->flow_cache; > > struct dp_packet *packet; > > const size_t cnt = dp_packet_batch_size(packets_); > > uint32_t cur_min = pmd->ctx.emc_insert_min; > > + int mf_ret; > > int i; > > uint16_t tcp_flags; > > bool smc_enable_db; > > @@ -6786,8 +6897,19 @@ dfc_processing(struct dp_netdev_pmd_thread > > *pmd, > > continue; > > } > > } > > - > > - miniflow_extract(packet, &key->mf); > Re-add removed white space here. Included in v5 > > + /* Set the count and packet for miniflow_opt with batch_size 1. */ > > + if ((pmd->miniflow_extract_opt) && (!md_is_valid)) { > Why would the metadata be invalid at stage if the condition above? There > is an earlier check in dfc processing above this block to check the state of > md_is_valid and if it's not then initialize it with pkt_metadata_init. > The check is kept here for the Inner circulation of packet and we currently handle only outer and not inner. > > + single_packet.count = 1; > > + single_packet.packets[0] = packet; > Can you provide more detail on the single packet count approach here? It > not clear on first reading the need for the different logic between standard > miniflow_extact and the optional case. The optimized miniflow_extract_func() processes a batch of packets. In the scalar DPIF, miniflow-extract executes one-by-one, and there is no batch to process. Building a batch of a single packet, and processing that "one packet batch" allows reuse of the same API. > > + mf_ret = pmd->miniflow_extract_opt(&single_packet, key, 1, > > + port_no, (void *) pmd); > > + /* Fallback to original miniflow_extract if there is a miss. */ > > + if (!mf_ret) { > > + miniflow_extract(packet, &key->mf); > > + } > > + } else { > > + miniflow_extract(packet, &key->mf); > > + } > Would add white space here also. > Included in v5. > > Br > Ian. > > key->len = 0; /* Not computed yet. */ > > key->hash = > > (md_is_valid == false) > > -- > > 2.25.1 > > > > _______________________________________________ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev