See some comments below… For this patch series, I’m only looking at the diff from v6..v9, not a full review. I will do basic compilation and some tests at the end.
Cheers, Eelco On 12 Jul 2021, at 7:51, kumar Amber wrote: > From: Kumar Amber <kumar.am...@intel.com> > > This patch introduces the MFEX function pointers which allows > the user to switch between different miniflow extract implementations > which are provided by the OVS based on optimized ISA CPU. > > The user can query for the available minflow extract variants available > for that CPU by following commands: > > $ovs-appctl dpif-netdev/miniflow-parser-get > > Similarly an user can set the miniflow implementation by the following > command : > > $ ovs-appctl dpif-netdev/miniflow-parser-set name > > This allows for more performance and flexibility to the user to choose > the miniflow implementation according to the needs. > > Signed-off-by: Kumar Amber <kumar.am...@intel.com> > Co-authored-by: Harry van Haaren <harry.van.haa...@intel.com> > Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> > > --- > v9: > - fix review comments from Flavio > v7: > - fix review comments(Eelco, Flavio) > v5: > - fix review comments(Ian, Flavio, Eelco) > - add enum to hold mfex indexes > - add new get and set implemenatations > - add Atomic set and get > --- > --- > NEWS | 1 + > lib/automake.mk | 2 + > lib/dpif-netdev-avx512.c | 31 +++++- > lib/dpif-netdev-private-extract.c | 162 ++++++++++++++++++++++++++++++ > lib/dpif-netdev-private-extract.h | 111 ++++++++++++++++++++ > lib/dpif-netdev-private-thread.h | 8 ++ > lib/dpif-netdev.c | 105 +++++++++++++++++++ > 7 files changed, 416 insertions(+), 4 deletions(-) > create mode 100644 lib/dpif-netdev-private-extract.c > create mode 100644 lib/dpif-netdev-private-extract.h > > diff --git a/NEWS b/NEWS > index 6cdccc715..b0f08e96d 100644 > --- a/NEWS > +++ b/NEWS > @@ -32,6 +32,7 @@ Post-v2.15.0 > * Enable the AVX512 DPCLS implementation to use VPOPCNT instruction if > the > CPU supports it. This enhances performance by using the native > vpopcount > instructions, instead of the emulated version of vpopcount. > + * Add command line option to switch between MFEX function pointers. > - ovs-ctl: > * New option '--no-record-hostname' to disable hostname configuration > in ovsdb on startup. > diff --git a/lib/automake.mk b/lib/automake.mk > index 3c9523c1a..53b8abc0f 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/dpif-netdev-private-dpcls.h \ > lib/dpif-netdev-private-dpif.c \ > lib/dpif-netdev-private-dpif.h \ > + lib/dpif-netdev-private-extract.c \ > + lib/dpif-netdev-private-extract.h \ > lib/dpif-netdev-private-flow.h \ > lib/dpif-netdev-private-thread.h \ > lib/dpif-netdev-private.h \ > diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c > index 6f9aa8284..7772b7abf 100644 > --- a/lib/dpif-netdev-avx512.c > +++ b/lib/dpif-netdev-avx512.c > @@ -149,6 +149,15 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread > *pmd, > * // do all processing (HWOL->MFEX->EMC->SMC) > * } > */ > + > + /* Do a batch minfilow extract into keys. */ > + uint32_t mf_mask = 0; > + miniflow_extract_func mfex_func; > + atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func); > + if (mfex_func) { > + mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd); > + } > + > uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1; > uint32_t iter = lookup_pkts_bitmask; > while (iter) { > @@ -167,6 +176,13 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread > *pmd, > pkt_metadata_init(&packet->md, in_port); > > struct dp_netdev_flow *f = NULL; > + struct netdev_flow_key *key = &keys[i]; > + > + /* Check the minfiflow mask to see if the packet was correctly > + * classifed by vector mfex else do a scalar miniflow extract > + * for that packet. > + */ > + bool mfex_hit = !!(mf_mask & (1 << i)); > > /* Check for a partial hardware offload match. */ > if (hwol_enabled) { > @@ -177,7 +193,13 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread > *pmd, > } > if (f) { > rules[i] = &f->cr; > - pkt_meta[i].tcp_flags = parse_tcp_flags(packet); > + /* If AVX512 MFEX already classified the packet, use it. */ > + if (mfex_hit) { > + pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf); > + } else { > + pkt_meta[i].tcp_flags = parse_tcp_flags(packet); > + } > + > pkt_meta[i].bytes = dp_packet_size(packet); > phwol_hits++; > hwol_emc_smc_hitmask |= (1 << i); > @@ -185,9 +207,10 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread > *pmd, > } > } > > - /* Do miniflow extract into keys. */ > - struct netdev_flow_key *key = &keys[i]; > - miniflow_extract(packet, &key->mf); > + if (!mfex_hit) { > + /* Do a scalar miniflow extract into keys. */ > + miniflow_extract(packet, &key->mf); > + } > > /* Cache TCP and byte values for all packets. */ > pkt_meta[i].bytes = dp_packet_size(packet); > diff --git a/lib/dpif-netdev-private-extract.c > b/lib/dpif-netdev-private-extract.c > new file mode 100644 > index 000000000..f476069a6 > --- /dev/null > +++ b/lib/dpif-netdev-private-extract.c > @@ -0,0 +1,162 @@ > +/* > + * Copyright (c) 2021 Intel. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <errno.h> > +#include <stdint.h> > +#include <string.h> > + > +#include "dp-packet.h" > +#include "dpif-netdev-private-dpcls.h" > +#include "dpif-netdev-private-extract.h" > +#include "dpif-netdev-private-thread.h" > +#include "flow.h" > +#include "openvswitch/vlog.h" > +#include "ovs-thread.h" > +#include "util.h" > + > +VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract); > + > +/* Variable to hold the default MFEX implementation. */ > +static miniflow_extract_func default_mfex_func = NULL; > + > +/* Implementations of available extract options and > + * the implementations are always in order of preference. > + */ > +static struct dpif_miniflow_extract_impl mfex_impls[] = { > + > + [MFEX_IMPL_SCALAR] = { > + .probe = NULL, > + .extract_func = NULL, > + .name = "scalar", }, > +}; > + > +BUILD_ASSERT_DECL(MFEX_IMPL_MAX == ARRAY_SIZE(mfex_impls)); > + > +void > +dpif_miniflow_extract_init(void) > +{ > + /* Call probe on each impl, and cache the result. */ > + for (int i = 0; i < MFEX_IMPL_MAX; i++) { > + bool avail = true; > + if (mfex_impls[i].probe) { > + /* Return zero is success, non-zero means error. */ > + avail = (mfex_impls[i].probe() == 0); > + } > + VLOG_INFO("Miniflow Extract implementation %s (available: %s)\n", > + mfex_impls[i].name, avail ? "available" : "not available"); > + mfex_impls[i].available = avail; > + } > +} > + > +miniflow_extract_func > +dp_mfex_impl_get_default(void) > +{ > + atomic_uintptr_t *mfex_func = (void *)&default_mfex_func; > + static bool default_mfex_func_set = false; > + int mfex_idx = MFEX_IMPL_SCALAR; > + > + /* For the first call, this will be choosen based on the > + * compile time flag and if nor flag is set it is set to > + * default scalar. > + */ > + if (OVS_UNLIKELY(!default_mfex_func_set)) { > + VLOG_INFO("Default MFEX implementation is %s.\n", > + mfex_impls[mfex_idx].name); > + atomic_store_relaxed(mfex_func, (uintptr_t) mfex_impls > + [mfex_idx].extract_func); > + default_mfex_func_set = true; If this only needs to be done once, why not move it to dpif_miniflow_extract_init() as suggested during the v6 review (in a later patch)? This will remove this check every time dp_mfex_impl_get_default() gets called. > + } > + > + return default_mfex_func; > +} > + > +int > +dp_mfex_impl_set_default_by_name(const char *name) > +{ > + miniflow_extract_func new_default; > + atomic_uintptr_t *mfex_func = (void *)&default_mfex_func; > + > + int err = dp_mfex_impl_get_by_name(name, &new_default); > + > + if (!err) { > + atomic_store_relaxed(mfex_func, (uintptr_t) new_default); > + } > + > + return err; > + > +} > + > +void > +dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list, > + size_t pmd_list_size) > +{ > + /* Add all MFEX functions to reply string. */ > + ds_put_cstr(reply, "Available MFEX implementations:\n"); > + > + for (int i = 0; i < MFEX_IMPL_MAX; i++) { > + ds_put_format(reply, " %s (available: %s pmds: ", > + mfex_impls[i].name, mfex_impls[i].available ? > + "True" : "False"); Flavio mentioned that True/False did not make sense to an end-user, not sure if he has the same feeling here? Maybe yes/no make more sense here? Flavio? > + > + for (size_t j = 0; j < pmd_list_size; j++) { > + struct dp_netdev_pmd_thread *pmd = pmd_list[j]; > + if (pmd->core_id == NON_PMD_CORE_ID) { > + continue; > + } > + > + if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) { > + ds_put_format(reply, "%u,", pmd->core_id); > + } > + } > + > + ds_chomp(reply, ','); > + > + if (ds_last(reply) == ' ') { > + ds_put_cstr(reply, "none"); > + } > + > + ds_put_cstr(reply, ")\n"); > + } > + > +} > + > +/* This function checks all available MFEX implementations, and selects and > + * returns the function pointer to the one requested by "name". If nothing > + * is found it reutrns error. reutrns -> returns > + */ > +int > +dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func) > +{ > + if ((name == NULL) || (out_func == NULL)) { > + return -EINVAL; > + } > + > + for (int i = 0; i < MFEX_IMPL_MAX; i++) { > + if (strcmp(mfex_impls[i].name, name) == 0) { > + /* Check available is set before exec. */ > + if (!mfex_impls[i].available) { > + *out_func = NULL; > + return -ENODEV; > + } > + > + *out_func = mfex_impls[i].extract_func; > + return 0; > + } > + } > + > + return -ENOENT; > +} > diff --git a/lib/dpif-netdev-private-extract.h > b/lib/dpif-netdev-private-extract.h > new file mode 100644 > index 000000000..ddf2e2845 > --- /dev/null > +++ b/lib/dpif-netdev-private-extract.h > @@ -0,0 +1,111 @@ > +/* > + * Copyright (c) 2021 Intel. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef MFEX_AVX512_EXTRACT > +#define MFEX_AVX512_EXTRACT 1 > + > +#include <sys/types.h> > + > +/* Forward declarations. */ > +struct dp_packet; > +struct miniflow; > +struct dp_netdev_pmd_thread; > +struct dp_packet_batch; > +struct netdev_flow_key; > + > +/* Function pointer prototype to be implemented in the optimized miniflow > + * extract code. > + * returns the hitmask of the processed packets on success. > + * returns zero on failure. > + */ > +typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch, > + struct netdev_flow_key *keys, > + uint32_t keys_size, > + odp_port_t in_port, > + struct dp_netdev_pmd_thread > + *pmd_handle); > + > + > +/* The function pointer miniflow_extract_func depends on batch size. */ > +BUILD_ASSERT_DECL(NETDEV_MAX_BURST == 32); > + > +/* Probe function is used to detect if this CPU has the ISA required > + * to run the optimized miniflow implementation. > + * returns one on successful probe. > + * returns negative errno on failure. > + */ > +typedef int (*miniflow_extract_probe)(void); > + > +/* Structure representing the attributes of an optimized implementation. */ > +struct dpif_miniflow_extract_impl { > + /* When it is true, this impl has passed the probe() checks. */ > + bool available; > + > + /* Probe function is used to detect if this CPU has the ISA required > + * to run the optimized miniflow implementation. It is optional and > + * if it is not used, then it must be null. > + */ > + miniflow_extract_probe probe; > + > + /* Optional function to call to extract miniflows for a burst of packets. > + * If it is not used must be set to NULL; > + */ > + miniflow_extract_func extract_func; > + > + /* Name of the optimized implementation. */ > + char *name; > +}; > + > + > +/* Enum to hold implementation indexes. The list is traversed > + * linearly as from the ISA perspective, the VBMI version > + * should always come before the generic AVX512-F version. > + */ > +enum dpif_miniflow_extract_impl_idx { > + MFEX_IMPL_SCALAR, > + MFEX_IMPL_MAX > +}; > + > +/* This function returns all available implementations to the caller. The > + * quantity of implementations is returned by the int return value. > + */ > +void > +dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list, > + size_t pmd_list_size) OVS_REQUIRES(dp_netdev_mutex); > + > +/* This function checks all available MFEX implementations, and selects the > + * returns the function pointer to the one requested by "name". > + */ > +int > +dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func); > + > +/* Returns the default MFEX which is first ./configure selected, but can be > + * overridden at runtime. */ > +miniflow_extract_func dp_mfex_impl_get_default(void); > + > +/* Overrides the default MFEX with the user set MFEX. */ > +int dp_mfex_impl_set_default_by_name(const char *name); > + > + > +/* Initializes the available miniflow extract implementations by probing for > + * the CPU ISA requirements. As the runtime available CPU ISA does not change > + * and the required ISA of the implementation also does not change, it is > safe > + * to cache the probe() results, and not call probe() at runtime. > + */ > +void > +dpif_miniflow_extract_init(void); > + > +#endif /* MFEX_AVX512_EXTRACT */ > diff --git a/lib/dpif-netdev-private-thread.h > b/lib/dpif-netdev-private-thread.h > index ba79c4a0a..a4c092b69 100644 > --- a/lib/dpif-netdev-private-thread.h > +++ b/lib/dpif-netdev-private-thread.h > @@ -27,6 +27,11 @@ > #include <stdint.h> > > #include "cmap.h" > + > +#include "dpif-netdev-private-dfc.h" > +#include "dpif-netdev-private-dpif.h" > +#include "dpif-netdev-perf.h" > +#include "dpif-netdev-private-extract.h" > #include "openvswitch/thread.h" > > #ifdef __cplusplus > @@ -110,6 +115,9 @@ struct dp_netdev_pmd_thread { > /* Pointer for per-DPIF implementation scratch space. */ > void *netdev_input_func_userdata; > > + /* Function pointer to call for miniflow_extract() functionality. */ > + ATOMIC(miniflow_extract_func) miniflow_extract_opt; > + > struct seq *reload_seq; > uint64_t last_reload_seq; > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 610949f36..b0eb8b186 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -45,6 +45,7 @@ > #include "dpif.h" > #include "dpif-netdev-lookup.h" > #include "dpif-netdev-perf.h" > +#include "dpif-netdev-private-extract.h" > #include "dpif-provider.h" > #include "dummy.h" > #include "fat-rwlock.h" > @@ -1050,6 +1051,95 @@ dpif_netdev_impl_set(struct unixctl_conn *conn, int > argc OVS_UNUSED, > ds_destroy(&reply); > } > > +static void > +dpif_miniflow_extract_impl_get(struct unixctl_conn *conn, int argc > OVS_UNUSED, > + const char *argv[] OVS_UNUSED, > + void *aux OVS_UNUSED) > +{ > + struct ds reply = DS_EMPTY_INITIALIZER; > + struct shash_node *node; > + > + ovs_mutex_lock(&dp_netdev_mutex); > + SHASH_FOR_EACH (node, &dp_netdevs) { > + struct dp_netdev_pmd_thread **pmd_list; > + struct dp_netdev *dp = node->data; > + size_t n; > + > + /* Get PMD threads list, required to get the DPIF impl used by each > PMD > + * thread. */ > + sorted_poll_thread_list(dp, &pmd_list, &n); > + dp_mfex_impl_get(&reply, pmd_list, n); > + } > + ovs_mutex_unlock(&dp_netdev_mutex); > + unixctl_command_reply(conn, ds_cstr(&reply)); > + ds_destroy(&reply); > +} > + > +static void > +dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc, This is failing compilation, you need to mark argc as unused: +dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc OVS_UNUSED, > + const char *argv[], void *aux OVS_UNUSED) > +{ > + /* This function requires just one parameter, the miniflow name. > + */ > + const char *mfex_name = argv[1]; > + struct shash_node *node; > + > + ovs_mutex_lock(&dp_netdev_mutex); > + int err = dp_mfex_impl_set_default_by_name(mfex_name); Not sure if we need to lock, as it might not help in the dp_netdev_configure_pmd() case (see below), and we might need to convert this to an atomic get/set, and then the lock can be removed here (moved down). > + > + if (err) { > + ovs_mutex_unlock(&dp_netdev_mutex); > + struct ds reply = DS_EMPTY_INITIALIZER; > + char *error_desc = NULL; > + if (err == -EINVAL) { > + error_desc = "Unknown miniflow extract implementation:"; > + } else if (err == -ENOENT) { > + error_desc = "Miniflow extract implementation doesn't exist:"; > + } else if (err == -ENODEV) { > + error_desc = "Miniflow extract implementation not available:"; > + } else { > + error_desc = "Miniflow extract implementation Error:"; > + } > + ds_put_format(&reply, "%s %s.\n", error_desc, mfex_name); > + const char *reply_str = ds_cstr(&reply); > + unixctl_command_reply_error(conn, reply_str); > + VLOG_INFO("%s", reply_str); > + ds_destroy(&reply); > + return; > + } > + > + SHASH_FOR_EACH (node, &dp_netdevs) { > + struct dp_netdev_pmd_thread **pmd_list; > + struct dp_netdev *dp = node->data; > + size_t n; > + > + sorted_poll_thread_list(dp, &pmd_list, &n); > + miniflow_extract_func default_func = dp_mfex_impl_get_default(); > + > + for (size_t i = 0; i < n; i++) { > + struct dp_netdev_pmd_thread *pmd = pmd_list[i]; > + if (pmd->core_id == NON_PMD_CORE_ID) { > + continue; > + } > + > + /* Initialize MFEX function pointer to the newly configured > + * default. */ > + atomic_uintptr_t *pmd_func = (void *) &pmd->miniflow_extract_opt; > + atomic_store_relaxed(pmd_func, (uintptr_t) default_func); > + }; > + } > + > + ovs_mutex_unlock(&dp_netdev_mutex); > + > + /* Reply with success to command. */ > + struct ds reply = DS_EMPTY_INITIALIZER; > + ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name); You missed my previous comment, which you where supposed to change: “”” > In other places we use "Miniflow Extract implementation", so I think we should > use the same here, i.e. “Miniflow Extract implementation set to %s.”. Even > with > this we still use MFEX and "Miniflow Extract", maybe we should consolidate to > use either one? I think "Miniflow Extract" might be the best as MFEX might > confuse users? > Using Miniflow extract implementation as standard. “”” > + const char *reply_str = ds_cstr(&reply); > + unixctl_command_reply(conn, reply_str); > + VLOG_INFO("%s", reply_str); > + ds_destroy(&reply); > +} > + > static void > dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc, > const char *argv[], void *aux OVS_UNUSED) > @@ -1279,6 +1369,13 @@ dpif_netdev_init(void) > unixctl_command_register("dpif-netdev/dpif-impl-get", "", > 0, 0, dpif_netdev_impl_get, > NULL); > + unixctl_command_register("dpif-netdev/miniflow-parser-set", > + "miniflow_implementation_name", > + 1, 1, dpif_miniflow_extract_impl_set, > + NULL); > + unixctl_command_register("dpif-netdev/miniflow-parser-get", "", > + 0, 0, dpif_miniflow_extract_impl_get, > + NULL); > return 0; > } > > @@ -1480,6 +1577,8 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > > dp->conntrack = conntrack_init(); > > + dpif_miniflow_extract_init(); > + > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); > > @@ -6222,6 +6321,11 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread > *pmd, struct dp_netdev *dp, > atomic_uintptr_t *pmd_func = (void *) &pmd->netdev_input_func; > atomic_init(pmd_func, (uintptr_t) default_func); > > + /* Init default miniflow_extract function */ > + miniflow_extract_func mfex_func = dp_mfex_impl_get_default(); Not sure if the dp_netdev_configure_pmd() path holds the dp_netdev_mutex. I do not think it does, if it does not, we have to make sure the dp_mfex_impl_get/set APIs become atomic. > + atomic_uintptr_t *pmd_func_mfex = (void *)&pmd->miniflow_extract_opt; > + atomic_store_relaxed(pmd_func_mfex, (uintptr_t) mfex_func); > + > /* init the 'flow_cache' since there is no > * actual thread created for NON_PMD_CORE_ID. */ > if (core_id == NON_PMD_CORE_ID) { > @@ -6872,6 +6976,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, > } > > miniflow_extract(packet, &key->mf); > + Guess this newline can be removed. > key->len = 0; /* Not computed yet. */ > key->hash = > (md_is_valid == false) > -- > 2.25.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev