Hi Toshiaki, Thanks for the patch. I have some questions inline.
On Thu, Jul 30, 2020 at 7:55 PM Toshiaki Makita <toshiaki.maki...@gmail.com> wrote: > > This provider offloads classifier to software XDP. > > It works only when a custom XDP object is loaded by afxdp netdev. > The BPF program needs to implement classifier with array-of-maps for > subtable hashmaps and arraymap for subtable masks. The flow api > provider detects classifier support in the custom XDP program when > loading it. > > The requirements for the BPF program is as below: > > - A struct named "meta_info" in ".ovs_meta" section > inform vswitchd of supported keys, actions, and the max number of > actions. > > - A map named "subtbl_masks" > An array which implements a list. The entry contains struct > xdp_subtables_mask provided by lib/netdev-offload-xdp.h followed by > miniflow buf of any length for the mask. > > - A map named "subtbl_masks_hd" > A one entry int array which contains the first entry index of the list > implemented by subtbl_masks. > > - A map named "flow_table" > An array-of-maps. Each entry points to subtable hash-map instantiated > with the following "subtbl_template". > The entry of each index of subtbl_masks has miniflow mask of subtable > in the corresponding index of flow_table. > > - A map named "subtbl_template" > A template for subtable, used for entries of "flow_table". > The key must be miniflow buf (without leading map). > > - A map named "output_map" > A devmap. Each entry represents odp port. > > - A map named "xsks_map" > A xskmap. Used for upcall. > > For more details, refer to the reference BPF program in next commit. > > In the future it may be possible to offload classifier to SmartNIC > through XDP, but currently it's not because map-in-map is not supported > by XDP hw-offload. > > Signed-off-by: Toshiaki Makita <toshiaki.maki...@gmail.com> > --- > acinclude.m4 | 25 + > configure.ac | 1 + > lib/automake.mk | 8 + > lib/bpf-util.c | 38 ++ > lib/bpf-util.h | 22 + > lib/netdev-afxdp.c | 218 +++++- > lib/netdev-afxdp.h | 3 + > lib/netdev-linux-private.h | 2 + > lib/netdev-offload-provider.h | 8 +- > lib/netdev-offload-xdp.c | 1213 +++++++++++++++++++++++++++++++++ > lib/netdev-offload-xdp.h | 49 ++ > lib/netdev-offload.c | 3 + > 12 files changed, 1588 insertions(+), 2 deletions(-) > create mode 100644 lib/bpf-util.c > create mode 100644 lib/bpf-util.h > create mode 100644 lib/netdev-offload-xdp.c > create mode 100644 lib/netdev-offload-xdp.h > > diff --git a/acinclude.m4 b/acinclude.m4 > index 4bac9dbdd..31ff0c013 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -329,6 +329,31 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [ > AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true) > ]) > > +dnl OVS_CHECK_LINUX_XDP_OFFLOAD > +dnl > +dnl Check both llvm and libbpf support > +AC_DEFUN([OVS_CHECK_LINUX_XDP_OFFLOAD], [ > + AC_ARG_ENABLE([xdp_offload], > + [AC_HELP_STRING([--enable-xdp-offload], > + [Compile XDP offload])], > + [], [enable_xdp_offload=no]) > + AC_MSG_CHECKING([whether XDP offload is enabled]) > + if test "$enable_xdp_offload" != yes; then > + AC_MSG_RESULT([no]) > + XDP_OFFLOAD_ENABLE=false > + else > + if test "$enable_afxdp" == no; then > + AC_MSG_ERROR([XDP offload depends on afxdp. Please add > --enable-afxdp.]) > + fi > + AC_MSG_RESULT([yes]) > + XDP_OFFLOAD_ENABLE=true > + > + AC_DEFINE([HAVE_XDP_OFFLOAD], [1], > + [Define to 1 if XDP offload compilation is available and > enabled.]) > + fi > + AM_CONDITIONAL([HAVE_XDP_OFFLOAD], test "$XDP_OFFLOAD_ENABLE" = true) > +]) > + > dnl OVS_CHECK_DPDK > dnl > dnl Configure DPDK source tree > diff --git a/configure.ac b/configure.ac > index 8d37af9db..530112c49 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -99,6 +99,7 @@ OVS_CHECK_DOT > OVS_CHECK_IF_DL > OVS_CHECK_STRTOK_R > OVS_CHECK_LINUX_AF_XDP > +OVS_CHECK_LINUX_XDP_OFFLOAD > AC_CHECK_DECLS([sys_siglist], [], [], [[#include <signal.h>]]) > AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec], > [], [], [[#include <sys/stat.h>]]) > diff --git a/lib/automake.mk b/lib/automake.mk > index 920c958e3..9486b32e7 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -451,6 +451,14 @@ lib_libopenvswitch_la_SOURCES += \ > lib/netdev-afxdp.h > endif > > +if HAVE_XDP_OFFLOAD > +lib_libopenvswitch_la_SOURCES += \ > + lib/bpf-util.c \ > + lib/bpf-util.h \ > + lib/netdev-offload-xdp.c \ > + lib/netdev-offload-xdp.h > +endif > + > if DPDK_NETDEV > lib_libopenvswitch_la_SOURCES += \ > lib/dpdk.c \ > diff --git a/lib/bpf-util.c b/lib/bpf-util.c > new file mode 100644 > index 000000000..324cfbe1d > --- /dev/null > +++ b/lib/bpf-util.c > @@ -0,0 +1,38 @@ > +/* > + * Copyright (c) 2020 NTT Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > + > +#include "bpf-util.h" > + > +#include <bpf/libbpf.h> > + > +#include "ovs-thread.h" > + > +DEFINE_STATIC_PER_THREAD_DATA(struct { char s[128]; }, > + libbpf_strerror_buffer, > + { "" }); > + > +const char * > +ovs_libbpf_strerror(int err) > +{ > + enum { BUFSIZE = sizeof libbpf_strerror_buffer_get()->s }; > + char *buf = libbpf_strerror_buffer_get()->s; > + > + libbpf_strerror(err, buf, BUFSIZE); > + > + return buf; > +} > diff --git a/lib/bpf-util.h b/lib/bpf-util.h > new file mode 100644 > index 000000000..6346935b3 > --- /dev/null > +++ b/lib/bpf-util.h > @@ -0,0 +1,22 @@ > +/* > + * Copyright (c) 2020 NTT Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef BPF_UTIL_H > +#define BPF_UTIL_H 1 > + > +const char *ovs_libbpf_strerror(int err); > + > +#endif /* bpf-util.h */ > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > index e1374ccbb..00204f299 100644 > --- a/lib/netdev-afxdp.c > +++ b/lib/netdev-afxdp.c > @@ -22,6 +22,7 @@ > #include "netdev-afxdp-pool.h" > > #include <bpf/bpf.h> > +#include <bpf/btf.h> > #include <errno.h> > #include <inttypes.h> > #include <linux/rtnetlink.h> > @@ -37,10 +38,13 @@ > #include <sys/types.h> > #include <unistd.h> > > +#include "bpf-util.h" > #include "coverage.h" > #include "dp-packet.h" > #include "dpif-netdev.h" > #include "fatal-signal.h" > +#include "netdev-offload-provider.h" > +#include "netdev-offload-xdp.h" > #include "openvswitch/compiler.h" > #include "openvswitch/dynamic-string.h" > #include "openvswitch/list.h" > @@ -261,10 +265,204 @@ netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED) > ovs_mutex_unlock(&unused_pools_mutex); > } > > +bool > +has_xdp_flowtable(struct netdev *netdev) > +{ > + struct netdev_linux *dev = netdev_linux_cast(netdev); > + > + return dev->has_xdp_flowtable; > +} > + > +struct bpf_object * > +get_xdp_object(struct netdev *netdev) > +{ > + struct netdev_linux *dev = netdev_linux_cast(netdev); > + > + return dev->xdp_obj; > +} > + > +#ifdef HAVE_XDP_OFFLOAD > +static int > +xdp_preload(struct netdev *netdev, struct bpf_object *obj) > +{ > + static struct ovsthread_once output_map_once = > OVSTHREAD_ONCE_INITIALIZER; > + struct btf *btf; > + struct bpf_map *flow_table, *subtbl_template, *subtbl_masks_hd, > *xsks_map, > + *output_map; > + const struct bpf_map_def *flow_table_def, *subtbl_def; > + int flow_table_fd, subtbl_meta_fd, xsks_map_fd; > + static int output_map_fd = -1; > + int n_rxq, err; > + > + btf = bpf_object__btf(obj); > + if (!btf) { > + VLOG_DBG("BPF object for netdev \"%s\" does not contain BTF", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + if (btf__find_by_name_kind(btf, ".ovs_meta", BTF_KIND_DATASEC) < 0) { > + VLOG_DBG("\".ovs_meta\" datasec not found in BTF"); > + return EOPNOTSUPP; > + } > + > + flow_table = bpf_object__find_map_by_name(obj, "flow_table"); > + if (!flow_table) { > + VLOG_DBG("%s: \"flow_table\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + subtbl_template = bpf_object__find_map_by_name(obj, "subtbl_template"); > + if (!subtbl_template) { > + VLOG_DBG("%s: \"subtbl_template\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + output_map = bpf_object__find_map_by_name(obj, "output_map"); > + if (!output_map) { > + VLOG_DBG("%s: \"output_map\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + if (ovsthread_once_start(&output_map_once)) { > + output_map_fd = bpf_create_map_name(BPF_MAP_TYPE_DEVMAP, > "output_map", > + sizeof(int), sizeof(int), > + XDP_MAX_PORTS, 0); > + if (output_map_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for output_map failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + ovsthread_once_done(&output_map_once); > + return err; > + } > + ovsthread_once_done(&output_map_once); > + } > + > + flow_table_def = bpf_map__def(flow_table); > + if (flow_table_def->type != BPF_MAP_TYPE_ARRAY_OF_MAPS) { > + VLOG_WARN("%s: \"flow_table\" map type is not map-in-map array.", > + netdev_get_name(netdev)); > + return EINVAL; > + } IIUC, for example flow_table, here we tried to read its definition from the object file (without loading it), and create a map, get its fd. > + > + subtbl_def = bpf_map__def(subtbl_template); > + if (subtbl_def->type != BPF_MAP_TYPE_HASH) { > + VLOG_WARN("%s: \"subtbl_templates\" map type is not hash-table", > + netdev_get_name(netdev)); > + return EINVAL; > + } > + > + subtbl_meta_fd = bpf_create_map(BPF_MAP_TYPE_HASH, subtbl_def->key_size, > + subtbl_def->value_size, > + subtbl_def->max_entries, 0); > + if (subtbl_meta_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for flow_table meta table failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + return err; > + } > + flow_table_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, > + "flow_table", sizeof(uint32_t), > + subtbl_meta_fd, > + flow_table_def->max_entries, > + 0); > + if (flow_table_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for flow_table failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + close(subtbl_meta_fd); > + return err; > + } > + close(subtbl_meta_fd); > + > + err = bpf_map__reuse_fd(flow_table, flow_table_fd); > + if (err) { > + VLOG_WARN("%s: Failed to reuse flow_table fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + close(flow_table_fd); > + return EINVAL; > + } > + close(flow_table_fd); Question: What's the purpose of bpf_map__reuse_fd and why do we close the flow_table_fd. I though later on when we call "bpf_object__load(obj)", all the maps will be loaded? > + > + subtbl_masks_hd = bpf_object__find_map_by_name(obj, "subtbl_masks_hd"); > + if (!subtbl_masks_hd) { > + /* Head index can be a global variable. In that case libbpf will > + * initialize it. */ > + VLOG_DBG("%s: \"subtbl_masks_hd\" map does not exist", > + netdev_get_name(netdev)); > + } else { > + int head_fd, head = XDP_SUBTABLES_TAIL, zero = 0; > + > + head_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "subtbl_masks_hd", > + sizeof(uint32_t), sizeof(int), 1, 0); > + if (head_fd < 0) { > + err = errno; > + VLOG_ERR("%s: Map creation of \"subtbl_masks_hd\" failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + return err; > + } > + > + if (bpf_map_update_elem(head_fd, &zero, &head, 0)) { > + err = errno; > + VLOG_ERR("Cannot update subtbl_masks_hd: %s", > + ovs_strerror(errno)); > + return err; > + } > + > + err = bpf_map__reuse_fd(subtbl_masks_hd, head_fd); > + if (err) { > + VLOG_ERR("%s: Failed to reuse subtbl_masks_hd fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + close(head_fd); > + return EINVAL; > + } > + close(head_fd); > + } > + > + xsks_map = bpf_object__find_map_by_name(obj, "xsks_map"); > + if (!xsks_map) { > + VLOG_ERR("%s: BUG: \"xsks_map\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + n_rxq = netdev_n_rxq(netdev); > + xsks_map_fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", > + sizeof(int), sizeof(int), n_rxq, 0); > + if (xsks_map_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for xsks_map failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + return err; > + } > + > + err = bpf_map__reuse_fd(xsks_map, xsks_map_fd); > + if (err) { > + VLOG_WARN("%s: Failed to reuse xsks_map fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + close(xsks_map_fd); > + return EINVAL; > + } > + close(xsks_map_fd); > + > + err = bpf_map__reuse_fd(output_map, output_map_fd); > + if (err) { > + VLOG_WARN("%s: Failed to reuse output_map fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + return EINVAL; > + } Do you need to close(output_map_fd)? > + > + return 0; > +} > +#endif > + > static int > xsk_load_prog(struct netdev *netdev, const char *path, > struct bpf_object **pobj, int *prog_fd) > { > + struct netdev_linux *dev OVS_UNUSED = netdev_linux_cast(netdev); > struct bpf_object_open_attr attr = { > .prog_type = BPF_PROG_TYPE_XDP, > .file = path, > @@ -298,6 +496,14 @@ xsk_load_prog(struct netdev *netdev, const char *path, > goto err; > } > > +#ifdef HAVE_XDP_OFFLOAD > + if (!xdp_preload(netdev, obj)) { I forgot what's the purpose of doing xdp_preload, before we call bpf_object__load below. Does it just to check whether the flowtable_afxdp.o has every map we want? Or it does initialize something else. > + VLOG_INFO("%s: Detected flowtable support in XDP program", > + netdev_get_name(netdev)); > + dev->has_xdp_flowtable = true; > + } > +#endif > + > if (bpf_object__load(obj)) { > VLOG_ERR("%s: Can't load XDP program at '%s'", > netdev_get_name(netdev), path); > @@ -1297,7 +1503,17 @@ libbpf_print(enum libbpf_print_level level, > > int netdev_afxdp_init(void) > { > - libbpf_set_print(libbpf_print); > + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > + > + if (ovsthread_once_start(&once)) { > + libbpf_set_print(libbpf_print); > +#ifdef HAVE_XDP_OFFLOAD > + if (netdev_register_flow_api_provider(&netdev_offload_xdp)) { > + VLOG_WARN("Failed to register XDP flow api provider"); > + } > +#endif > + ovsthread_once_done(&once); > + } > return 0; > } > > diff --git a/lib/netdev-afxdp.h b/lib/netdev-afxdp.h > index e91cd102d..324152e8f 100644 > --- a/lib/netdev-afxdp.h > +++ b/lib/netdev-afxdp.h > @@ -44,6 +44,7 @@ struct netdev_stats; > struct smap; > struct xdp_umem; > struct xsk_socket_info; > +struct bpf_object; > > int netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_); > void netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_); > @@ -70,6 +71,8 @@ int netdev_afxdp_get_custom_stats(const struct netdev > *netdev, > void free_afxdp_buf(struct dp_packet *p); > int netdev_afxdp_reconfigure(struct netdev *netdev); > void signal_remove_xdp(struct netdev *netdev); > +bool has_xdp_flowtable(struct netdev *netdev); > +struct bpf_object *get_xdp_object(struct netdev *netdev); > > #else /* !HAVE_AF_XDP */ > Thanks William _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev