Hi Toshiaki,

Thanks for the patch. I have some questions inline.

On Thu, Jul 30, 2020 at 7:55 PM Toshiaki Makita
<toshiaki.maki...@gmail.com> wrote:
>
> This provider offloads classifier to software XDP.
>
> It works only when a custom XDP object is loaded by afxdp netdev.
> The BPF program needs to implement classifier with array-of-maps for
> subtable hashmaps and arraymap for subtable masks. The flow api
> provider detects classifier support in the custom XDP program when
> loading it.
>
> The requirements for the BPF program is as below:
>
> - A struct named "meta_info" in ".ovs_meta" section
>   inform vswitchd of supported keys, actions, and the max number of
>   actions.
>
> - A map named "subtbl_masks"
>   An array which implements a list. The entry contains struct
>   xdp_subtables_mask provided by lib/netdev-offload-xdp.h followed by
>   miniflow buf of any length for the mask.
>
> - A map named "subtbl_masks_hd"
>   A one entry int array which contains the first entry index of the list
>   implemented by subtbl_masks.
>
> - A map named "flow_table"
>   An array-of-maps. Each entry points to subtable hash-map instantiated
>   with the following "subtbl_template".
>   The entry of each index of subtbl_masks has miniflow mask of subtable
>   in the corresponding index of flow_table.
>
> - A map named "subtbl_template"
>   A template for subtable, used for entries of "flow_table".
>   The key must be miniflow buf (without leading map).
>
> - A map named "output_map"
>   A devmap. Each entry represents odp port.
>
> - A map named "xsks_map"
>   A xskmap. Used for upcall.
>
> For more details, refer to the reference BPF program in next commit.
>
> In the future it may be possible to offload classifier to SmartNIC
> through XDP, but currently it's not because map-in-map is not supported
> by XDP hw-offload.
>
> Signed-off-by: Toshiaki Makita <toshiaki.maki...@gmail.com>
> ---
>  acinclude.m4                  |   25 +
>  configure.ac                  |    1 +
>  lib/automake.mk               |    8 +
>  lib/bpf-util.c                |   38 ++
>  lib/bpf-util.h                |   22 +
>  lib/netdev-afxdp.c            |  218 +++++-
>  lib/netdev-afxdp.h            |    3 +
>  lib/netdev-linux-private.h    |    2 +
>  lib/netdev-offload-provider.h |    8 +-
>  lib/netdev-offload-xdp.c      | 1213 +++++++++++++++++++++++++++++++++
>  lib/netdev-offload-xdp.h      |   49 ++
>  lib/netdev-offload.c          |    3 +
>  12 files changed, 1588 insertions(+), 2 deletions(-)
>  create mode 100644 lib/bpf-util.c
>  create mode 100644 lib/bpf-util.h
>  create mode 100644 lib/netdev-offload-xdp.c
>  create mode 100644 lib/netdev-offload-xdp.h
>
> diff --git a/acinclude.m4 b/acinclude.m4
> index 4bac9dbdd..31ff0c013 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -329,6 +329,31 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
>    AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
>  ])
>
> +dnl OVS_CHECK_LINUX_XDP_OFFLOAD
> +dnl
> +dnl Check both llvm and libbpf support
> +AC_DEFUN([OVS_CHECK_LINUX_XDP_OFFLOAD], [
> +  AC_ARG_ENABLE([xdp_offload],
> +                [AC_HELP_STRING([--enable-xdp-offload],
> +                                [Compile XDP offload])],
> +                [], [enable_xdp_offload=no])
> +  AC_MSG_CHECKING([whether XDP offload is enabled])
> +  if test "$enable_xdp_offload" != yes; then
> +    AC_MSG_RESULT([no])
> +    XDP_OFFLOAD_ENABLE=false
> +  else
> +    if test "$enable_afxdp" == no; then
> +      AC_MSG_ERROR([XDP offload depends on afxdp. Please add 
> --enable-afxdp.])
> +    fi
> +    AC_MSG_RESULT([yes])
> +    XDP_OFFLOAD_ENABLE=true
> +
> +    AC_DEFINE([HAVE_XDP_OFFLOAD], [1],
> +              [Define to 1 if XDP offload compilation is available and 
> enabled.])
> +  fi
> +  AM_CONDITIONAL([HAVE_XDP_OFFLOAD], test "$XDP_OFFLOAD_ENABLE" = true)
> +])
> +
>  dnl OVS_CHECK_DPDK
>  dnl
>  dnl Configure DPDK source tree
> diff --git a/configure.ac b/configure.ac
> index 8d37af9db..530112c49 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -99,6 +99,7 @@ OVS_CHECK_DOT
>  OVS_CHECK_IF_DL
>  OVS_CHECK_STRTOK_R
>  OVS_CHECK_LINUX_AF_XDP
> +OVS_CHECK_LINUX_XDP_OFFLOAD
>  AC_CHECK_DECLS([sys_siglist], [], [], [[#include <signal.h>]])
>  AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
>    [], [], [[#include <sys/stat.h>]])
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 920c958e3..9486b32e7 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -451,6 +451,14 @@ lib_libopenvswitch_la_SOURCES += \
>         lib/netdev-afxdp.h
>  endif
>
> +if HAVE_XDP_OFFLOAD
> +lib_libopenvswitch_la_SOURCES += \
> +       lib/bpf-util.c \
> +       lib/bpf-util.h \
> +       lib/netdev-offload-xdp.c \
> +       lib/netdev-offload-xdp.h
> +endif
> +
>  if DPDK_NETDEV
>  lib_libopenvswitch_la_SOURCES += \
>         lib/dpdk.c \
> diff --git a/lib/bpf-util.c b/lib/bpf-util.c
> new file mode 100644
> index 000000000..324cfbe1d
> --- /dev/null
> +++ b/lib/bpf-util.c
> @@ -0,0 +1,38 @@
> +/*
> + * Copyright (c) 2020 NTT Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "bpf-util.h"
> +
> +#include <bpf/libbpf.h>
> +
> +#include "ovs-thread.h"
> +
> +DEFINE_STATIC_PER_THREAD_DATA(struct { char s[128]; },
> +                              libbpf_strerror_buffer,
> +                              { "" });
> +
> +const char *
> +ovs_libbpf_strerror(int err)
> +{
> +    enum { BUFSIZE = sizeof libbpf_strerror_buffer_get()->s };
> +    char *buf = libbpf_strerror_buffer_get()->s;
> +
> +    libbpf_strerror(err, buf, BUFSIZE);
> +
> +    return buf;
> +}
> diff --git a/lib/bpf-util.h b/lib/bpf-util.h
> new file mode 100644
> index 000000000..6346935b3
> --- /dev/null
> +++ b/lib/bpf-util.h
> @@ -0,0 +1,22 @@
> +/*
> + * Copyright (c) 2020 NTT Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef BPF_UTIL_H
> +#define BPF_UTIL_H 1
> +
> +const char *ovs_libbpf_strerror(int err);
> +
> +#endif /* bpf-util.h */
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index e1374ccbb..00204f299 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -22,6 +22,7 @@
>  #include "netdev-afxdp-pool.h"
>
>  #include <bpf/bpf.h>
> +#include <bpf/btf.h>
>  #include <errno.h>
>  #include <inttypes.h>
>  #include <linux/rtnetlink.h>
> @@ -37,10 +38,13 @@
>  #include <sys/types.h>
>  #include <unistd.h>
>
> +#include "bpf-util.h"
>  #include "coverage.h"
>  #include "dp-packet.h"
>  #include "dpif-netdev.h"
>  #include "fatal-signal.h"
> +#include "netdev-offload-provider.h"
> +#include "netdev-offload-xdp.h"
>  #include "openvswitch/compiler.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "openvswitch/list.h"
> @@ -261,10 +265,204 @@ netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED)
>      ovs_mutex_unlock(&unused_pools_mutex);
>  }
>
> +bool
> +has_xdp_flowtable(struct netdev *netdev)
> +{
> +    struct netdev_linux *dev = netdev_linux_cast(netdev);
> +
> +    return dev->has_xdp_flowtable;
> +}
> +
> +struct bpf_object *
> +get_xdp_object(struct netdev *netdev)
> +{
> +    struct netdev_linux *dev = netdev_linux_cast(netdev);
> +
> +    return dev->xdp_obj;
> +}
> +
> +#ifdef HAVE_XDP_OFFLOAD
> +static int
> +xdp_preload(struct netdev *netdev, struct bpf_object *obj)
> +{
> +    static struct ovsthread_once output_map_once = 
> OVSTHREAD_ONCE_INITIALIZER;
> +    struct btf *btf;
> +    struct bpf_map *flow_table, *subtbl_template, *subtbl_masks_hd, 
> *xsks_map,
> +                   *output_map;
> +    const struct bpf_map_def *flow_table_def, *subtbl_def;
> +    int flow_table_fd, subtbl_meta_fd, xsks_map_fd;
> +    static int output_map_fd = -1;
> +    int n_rxq, err;
> +
> +    btf = bpf_object__btf(obj);
> +    if (!btf) {
> +        VLOG_DBG("BPF object for netdev \"%s\" does not contain BTF",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +    if (btf__find_by_name_kind(btf, ".ovs_meta", BTF_KIND_DATASEC) < 0) {
> +        VLOG_DBG("\".ovs_meta\" datasec not found in BTF");
> +        return EOPNOTSUPP;
> +    }
> +
> +    flow_table = bpf_object__find_map_by_name(obj, "flow_table");
> +    if (!flow_table) {
> +        VLOG_DBG("%s: \"flow_table\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    subtbl_template = bpf_object__find_map_by_name(obj, "subtbl_template");
> +    if (!subtbl_template) {
> +        VLOG_DBG("%s: \"subtbl_template\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    output_map = bpf_object__find_map_by_name(obj, "output_map");
> +    if (!output_map) {
> +        VLOG_DBG("%s: \"output_map\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    if (ovsthread_once_start(&output_map_once)) {
> +        output_map_fd = bpf_create_map_name(BPF_MAP_TYPE_DEVMAP, 
> "output_map",
> +                                            sizeof(int), sizeof(int),
> +                                            XDP_MAX_PORTS, 0);
> +        if (output_map_fd < 0) {
> +            err = errno;
> +            VLOG_WARN("%s: Map creation for output_map failed: %s",
> +                      netdev_get_name(netdev), ovs_strerror(errno));
> +            ovsthread_once_done(&output_map_once);
> +            return err;
> +        }
> +        ovsthread_once_done(&output_map_once);
> +    }
> +
> +    flow_table_def = bpf_map__def(flow_table);
> +    if (flow_table_def->type != BPF_MAP_TYPE_ARRAY_OF_MAPS) {
> +        VLOG_WARN("%s: \"flow_table\" map type is not map-in-map array.",
> +                  netdev_get_name(netdev));
> +        return EINVAL;
> +    }

IIUC, for example flow_table, here we tried to read its definition from the
object file (without loading it), and create a map, get its fd.

> +
> +    subtbl_def = bpf_map__def(subtbl_template);
> +    if (subtbl_def->type != BPF_MAP_TYPE_HASH) {
> +        VLOG_WARN("%s: \"subtbl_templates\" map type is not hash-table",
> +                  netdev_get_name(netdev));
> +        return EINVAL;
> +    }
> +
> +    subtbl_meta_fd = bpf_create_map(BPF_MAP_TYPE_HASH, subtbl_def->key_size,
> +                                    subtbl_def->value_size,
> +                                    subtbl_def->max_entries, 0);
> +    if (subtbl_meta_fd < 0) {
> +        err = errno;
> +        VLOG_WARN("%s: Map creation for flow_table meta table failed: %s",
> +                  netdev_get_name(netdev), ovs_strerror(errno));
> +        return err;
> +    }
> +    flow_table_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS,
> +                                          "flow_table", sizeof(uint32_t),
> +                                          subtbl_meta_fd,
> +                                          flow_table_def->max_entries,
> +                                          0);
> +    if (flow_table_fd < 0) {
> +        err = errno;
> +        VLOG_WARN("%s: Map creation for flow_table failed: %s",
> +                  netdev_get_name(netdev), ovs_strerror(errno));
> +        close(subtbl_meta_fd);
> +        return err;
> +    }
> +    close(subtbl_meta_fd);
> +
> +    err = bpf_map__reuse_fd(flow_table, flow_table_fd);
> +    if (err) {
> +        VLOG_WARN("%s: Failed to reuse flow_table fd: %s",
> +                  netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +        close(flow_table_fd);
> +        return EINVAL;
> +    }
> +    close(flow_table_fd);

Question:
What's the purpose of bpf_map__reuse_fd and why do we close the flow_table_fd.
I though later on when we call "bpf_object__load(obj)", all the maps
will be loaded?

> +
> +    subtbl_masks_hd = bpf_object__find_map_by_name(obj, "subtbl_masks_hd");
> +    if (!subtbl_masks_hd) {
> +        /* Head index can be a global variable. In that case libbpf will
> +         * initialize it. */
> +        VLOG_DBG("%s: \"subtbl_masks_hd\" map does not exist",
> +                 netdev_get_name(netdev));
> +    } else {
> +        int head_fd, head = XDP_SUBTABLES_TAIL, zero = 0;
> +
> +        head_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "subtbl_masks_hd",
> +                                      sizeof(uint32_t), sizeof(int), 1, 0);
> +        if (head_fd < 0) {
> +            err = errno;
> +            VLOG_ERR("%s: Map creation of \"subtbl_masks_hd\" failed: %s",
> +                     netdev_get_name(netdev), ovs_strerror(errno));
> +            return err;
> +        }
> +
> +        if (bpf_map_update_elem(head_fd, &zero, &head, 0)) {
> +            err = errno;
> +            VLOG_ERR("Cannot update subtbl_masks_hd: %s",
> +                     ovs_strerror(errno));
> +            return err;
> +        }
> +
> +        err = bpf_map__reuse_fd(subtbl_masks_hd, head_fd);
> +        if (err) {
> +            VLOG_ERR("%s: Failed to reuse subtbl_masks_hd fd: %s",
> +                     netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +            close(head_fd);
> +            return EINVAL;
> +        }
> +        close(head_fd);
> +    }
> +
> +    xsks_map = bpf_object__find_map_by_name(obj, "xsks_map");
> +    if (!xsks_map) {
> +        VLOG_ERR("%s: BUG: \"xsks_map\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    n_rxq = netdev_n_rxq(netdev);
> +    xsks_map_fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
> +                                      sizeof(int), sizeof(int), n_rxq, 0);
> +    if (xsks_map_fd < 0) {
> +        err = errno;
> +        VLOG_WARN("%s: Map creation for xsks_map failed: %s",
> +                  netdev_get_name(netdev), ovs_strerror(errno));
> +        return err;
> +    }
> +
> +    err = bpf_map__reuse_fd(xsks_map, xsks_map_fd);
> +    if (err) {
> +        VLOG_WARN("%s: Failed to reuse xsks_map fd: %s",
> +                  netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +        close(xsks_map_fd);
> +        return EINVAL;
> +    }
> +    close(xsks_map_fd);
> +
> +    err = bpf_map__reuse_fd(output_map, output_map_fd);
> +    if (err) {
> +        VLOG_WARN("%s: Failed to reuse output_map fd: %s",
> +                  netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +        return EINVAL;
> +    }

Do you need to close(output_map_fd)?

> +
> +    return 0;
> +}
> +#endif
> +
>  static int
>  xsk_load_prog(struct netdev *netdev, const char *path,
>                struct bpf_object **pobj, int *prog_fd)
>  {
> +    struct netdev_linux *dev OVS_UNUSED = netdev_linux_cast(netdev);
>      struct bpf_object_open_attr attr = {
>          .prog_type = BPF_PROG_TYPE_XDP,
>          .file = path,
> @@ -298,6 +496,14 @@ xsk_load_prog(struct netdev *netdev, const char *path,
>          goto err;
>      }
>
> +#ifdef HAVE_XDP_OFFLOAD
> +    if (!xdp_preload(netdev, obj)) {

I forgot what's the purpose of doing xdp_preload, before we call
bpf_object__load below.
Does it just to check whether the flowtable_afxdp.o has every map we want?
Or it does initialize something else.

> +        VLOG_INFO("%s: Detected flowtable support in XDP program",
> +                  netdev_get_name(netdev));
> +        dev->has_xdp_flowtable = true;
> +    }
> +#endif
> +
>      if (bpf_object__load(obj)) {
>          VLOG_ERR("%s: Can't load XDP program at '%s'",
>                   netdev_get_name(netdev), path);
> @@ -1297,7 +1503,17 @@ libbpf_print(enum libbpf_print_level level,
>
>  int netdev_afxdp_init(void)
>  {
> -    libbpf_set_print(libbpf_print);
> +    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> +
> +    if (ovsthread_once_start(&once)) {
> +        libbpf_set_print(libbpf_print);
> +#ifdef HAVE_XDP_OFFLOAD
> +        if (netdev_register_flow_api_provider(&netdev_offload_xdp)) {
> +            VLOG_WARN("Failed to register XDP flow api provider");
> +        }
> +#endif
> +        ovsthread_once_done(&once);
> +    }
>      return 0;
>  }
>
> diff --git a/lib/netdev-afxdp.h b/lib/netdev-afxdp.h
> index e91cd102d..324152e8f 100644
> --- a/lib/netdev-afxdp.h
> +++ b/lib/netdev-afxdp.h
> @@ -44,6 +44,7 @@ struct netdev_stats;
>  struct smap;
>  struct xdp_umem;
>  struct xsk_socket_info;
> +struct bpf_object;
>
>  int netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_);
>  void netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_);
> @@ -70,6 +71,8 @@ int netdev_afxdp_get_custom_stats(const struct netdev 
> *netdev,
>  void free_afxdp_buf(struct dp_packet *p);
>  int netdev_afxdp_reconfigure(struct netdev *netdev);
>  void signal_remove_xdp(struct netdev *netdev);
> +bool has_xdp_flowtable(struct netdev *netdev);
> +struct bpf_object *get_xdp_object(struct netdev *netdev);
>
>  #else /* !HAVE_AF_XDP */
>
Thanks
William
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to