Thanks for the patch. My comments below:

On Mon, Jun 29, 2020 at 8:30 AM Toshiaki Makita
<toshiaki.maki...@gmail.com> wrote:
>
> This provider offloads classifier to software XDP.
>
> It works only when a custom XDP object is loaded by afxdp netdev.
> The BPF program needs to implement classifier with array-of-maps for
> subtable hashmaps and arraymap for subtable masks. The flow api
> provider detects classifier support in the custom XDP program when
> loading it.
>
> The requirements for the BPF program is as below:
>
> - A struct named "meta_info" in ".ovs_meta" section
>   inform vswitchd of supported keys, actions, and the max number of
>   actions.
>
> - A map named "subtbl_masks"
>   An array which implements a list. The entry contains struct
>   xdp_subtables_mask provided by lib/netdev-offload-xdp.h followed by
>   miniflow buf of any length for the mask.
>
> - A map named "subtbl_masks_hd"
>   A one entry int array which contains the first entry index of the list
>   implemented by subtbl_masks.
>
> - A map named "flow_table"
>   An array-of-maps. Each entry points to subtable hash-map instantiated
>   with the following "subtbl_template".
>   The entry of each index of subtbl_masks has miniflow mask of subtable
>   in the corresponding index of flow_table.
>
> - A map named "subtbl_template"
>   A template for subtable, used for entries of "flow_table".
>   The key must be miniflow buf (without leading map).
>
> - A map named "output_map"
>   A devmap. Each entry represents odp port.
>
> - A map named "xsks_map"
>   A xskmap. Used for upcall.

Maybe later on, we can add the above to Documentation/ and
explain the purpose of these maps there.

>
> For more details, refer to the reference BPF program in next commit.
>
> In the future it may be possible to offload classifier to SmartNIC
> through XDP, but currently it's not because map-in-map is not supported
> by XDP hw-offload.

I think it will be hard, given that only Netronome supports XDP offload today.

>
> Signed-off-by: Toshiaki Makita <toshiaki.maki...@gmail.com>
> ---
>  lib/automake.mk               |    6 +-
>  lib/bpf-util.c                |   38 +
>  lib/bpf-util.h                |   22 +
>  lib/netdev-afxdp.c            |  205 ++++-
>  lib/netdev-afxdp.h            |    3 +
>  lib/netdev-linux-private.h    |    2 +
>  lib/netdev-offload-provider.h |    6 +
>  lib/netdev-offload-xdp.c      | 1315 +++++++++++++++++++++++++++++++++
>  lib/netdev-offload-xdp.h      |   49 ++
>  lib/netdev-offload.c          |    6 +
>  10 files changed, 1650 insertions(+), 2 deletions(-)
>  create mode 100644 lib/bpf-util.c
>  create mode 100644 lib/bpf-util.h
>  create mode 100644 lib/netdev-offload-xdp.c
>  create mode 100644 lib/netdev-offload-xdp.h
>
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 86940ccd2..1fa1371f3 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -421,10 +421,14 @@ endif
>
>  if HAVE_AF_XDP
>  lib_libopenvswitch_la_SOURCES += \
> +       lib/bpf-util.c \
> +       lib/bpf-util.h \
>         lib/netdev-afxdp-pool.c \
>         lib/netdev-afxdp-pool.h \
>         lib/netdev-afxdp.c \
> -       lib/netdev-afxdp.h
> +       lib/netdev-afxdp.h \
> +       lib/netdev-offload-xdp.c \
> +       lib/netdev-offload-xdp.h
>  endif

How about doing s.t like:
--enable-afxdp: the current one on master without the xdp offload program
--enable-afxdp-with-bpf: the afxdp one plus your xdp offload program

So that when users only --enable-afxdp, it doesn't need to compile in the
lib/netdev-offload-xdp.* and bpf/*

>
>  if DPDK_NETDEV
> diff --git a/lib/bpf-util.c b/lib/bpf-util.c
> new file mode 100644
> index 000000000..324cfbe1d
> --- /dev/null
> +++ b/lib/bpf-util.c
> @@ -0,0 +1,38 @@
> +/*
> + * Copyright (c) 2020 NTT Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "bpf-util.h"
> +
> +#include <bpf/libbpf.h>
> +
> +#include "ovs-thread.h"
> +
> +DEFINE_STATIC_PER_THREAD_DATA(struct { char s[128]; },
> +                              libbpf_strerror_buffer,
> +                              { "" });
> +
> +const char *
> +ovs_libbpf_strerror(int err)
> +{
> +    enum { BUFSIZE = sizeof libbpf_strerror_buffer_get()->s };
just curious:
what's the benefit of using enum {BUFSIZE ...} here?


> +    char *buf = libbpf_strerror_buffer_get()->s;
> +
> +    libbpf_strerror(err, buf, BUFSIZE);
> +
> +    return buf;
> +}
> diff --git a/lib/bpf-util.h b/lib/bpf-util.h
> new file mode 100644
> index 000000000..6346935b3
> --- /dev/null
> +++ b/lib/bpf-util.h
> @@ -0,0 +1,22 @@
> +/*
> + * Copyright (c) 2020 NTT Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef BPF_UTIL_H
> +#define BPF_UTIL_H 1
> +
> +const char *ovs_libbpf_strerror(int err);
> +
> +#endif /* bpf-util.h */
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 2b2b811e2..9229ae1b0 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -22,6 +22,7 @@
>  #include "netdev-afxdp-pool.h"
>
>  #include <bpf/bpf.h>
> +#include <bpf/btf.h>
>  #include <errno.h>
>  #include <inttypes.h>
>  #include <linux/rtnetlink.h>
> @@ -37,10 +38,13 @@
>  #include <sys/types.h>
>  #include <unistd.h>
>
> +#include "bpf-util.h"
>  #include "coverage.h"
>  #include "dp-packet.h"
>  #include "dpif-netdev.h"
>  #include "fatal-signal.h"
> +#include "netdev-offload-provider.h"
> +#include "netdev-offload-xdp.h"
>  #include "openvswitch/compiler.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "openvswitch/list.h"
> @@ -261,10 +265,203 @@ netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED)
>      ovs_mutex_unlock(&unused_pools_mutex);
>  }
>
> +bool
> +has_xdp_flowtable(struct netdev *netdev)
> +{
> +    struct netdev_linux *dev = netdev_linux_cast(netdev);
> +
> +    return dev->has_xdp_flowtable;
> +}
> +
> +struct bpf_object *
> +get_xdp_object(struct netdev *netdev)
> +{
> +    struct netdev_linux *dev = netdev_linux_cast(netdev);
> +
> +    return dev->xdp_obj;
> +}
> +
> +static int
> +xdp_preload(struct netdev *netdev, struct bpf_object *obj)
> +{
> +    static struct ovsthread_once output_map_once = 
> OVSTHREAD_ONCE_INITIALIZER;
> +    struct btf *btf;
> +    struct bpf_map *flow_table, *subtbl_template, *subtbl_masks_hd, 
> *xsks_map,
> +                   *output_map;
> +    const struct bpf_map_def *flow_table_def, *subtbl_def;
> +    int flow_table_fd, subtbl_meta_fd, xsks_map_fd;
> +    static int output_map_fd = -1;
> +    int n_rxq, err;
> +
> +    btf = bpf_object__btf(obj);
> +    if (!btf) {
> +        VLOG_DBG("BPF object for netdev \"%s\" does not contain BTF",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +    if (btf__find_by_name_kind(btf, ".ovs_meta", BTF_KIND_DATASEC) < 0) {
> +        VLOG_DBG("\".ovs_meta\" datasec not found in BTF");
> +        return EOPNOTSUPP;
> +    }
> +
> +    flow_table = bpf_object__find_map_by_name(obj, "flow_table");
> +    if (!flow_table) {
> +        VLOG_DBG("%s: \"flow_table\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    subtbl_template = bpf_object__find_map_by_name(obj, "subtbl_template");
> +    if (!subtbl_template) {
> +        VLOG_DBG("%s: \"subtbl_template\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    output_map = bpf_object__find_map_by_name(obj, "output_map");
> +    if (!output_map) {
> +        VLOG_DBG("%s: \"output_map\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    if (ovsthread_once_start(&output_map_once)) {
> +        /* XXX: close output_map_fd somewhere? */
maybe at uninit_flow_api, we have to check whether there is
still a netdev using linux_xdp offload. And if not, close this map?

> +        output_map_fd = bpf_create_map_name(BPF_MAP_TYPE_DEVMAP, 
> "output_map",
> +                                            sizeof(int), sizeof(int),
> +                                            XDP_MAX_PORTS, 0);
> +        if (output_map_fd < 0) {
> +            err = errno;
> +            VLOG_WARN("%s: Map creation for output_map failed: %s",
> +                      netdev_get_name(netdev), ovs_strerror(errno));
> +            ovsthread_once_done(&output_map_once);
> +            return err;
> +        }
> +        ovsthread_once_done(&output_map_once);
> +    }
> +
> +    flow_table_def = bpf_map__def(flow_table);
> +    if (flow_table_def->type != BPF_MAP_TYPE_ARRAY_OF_MAPS) {
> +        VLOG_WARN("%s: \"flow_table\" map type is not map-in-map array.",
> +                  netdev_get_name(netdev));
> +        return EINVAL;
> +    }
> +
> +    subtbl_def = bpf_map__def(subtbl_template);
> +    if (subtbl_def->type != BPF_MAP_TYPE_HASH) {
> +        VLOG_WARN("%s: \"subtbl_templates\" map type is not hash-table",
> +                  netdev_get_name(netdev));
> +        return EINVAL;
> +    }
> +
> +    subtbl_meta_fd = bpf_create_map(BPF_MAP_TYPE_HASH, subtbl_def->key_size,
> +                                    subtbl_def->value_size,
> +                                    subtbl_def->max_entries, 0);
> +    if (subtbl_meta_fd < 0) {
> +        err = errno;
> +        VLOG_WARN("%s: Map creation for flow_table meta table failed: %s",
> +                  netdev_get_name(netdev), ovs_strerror(errno));
> +        return err;
> +    }
> +    flow_table_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS,
> +                                          "flow_table", sizeof(uint32_t),
> +                                          subtbl_meta_fd,
> +                                          flow_table_def->max_entries,
> +                                          0);
> +    if (flow_table_fd < 0) {
> +        err = errno;
> +        VLOG_WARN("%s: Map creation for flow_table failed: %s",
> +                  netdev_get_name(netdev), ovs_strerror(errno));
> +        close(subtbl_meta_fd);
> +        return err;
> +    }
> +    close(subtbl_meta_fd);
> +
> +    err = bpf_map__reuse_fd(flow_table, flow_table_fd);
> +    if (err) {
> +        VLOG_WARN("%s: Failed to reuse flow_table fd: %s",
> +                  netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +        close(flow_table_fd);
> +        return EINVAL;
> +    }
> +    close(flow_table_fd);
> +
> +    subtbl_masks_hd = bpf_object__find_map_by_name(obj, "subtbl_masks_hd");
> +    if (!subtbl_masks_hd) {
> +        /* Head index can be a global variable. In that case libbpf will
> +         * initialize it. */
> +        VLOG_DBG("%s: \"subtbl_masks_hd\" map does not exist",
> +                 netdev_get_name(netdev));
> +    } else {
> +        int head_fd, head = XDP_SUBTABLES_TAIL, zero = 0;
> +
> +        head_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "subtbl_masks_hd",
> +                                      sizeof(uint32_t), sizeof(int), 1, 0);
> +        if (head_fd < 0) {
> +            err = errno;
> +            VLOG_ERR("%s: Map creation of \"subtbl_masks_hd\" failed: %s",
> +                     netdev_get_name(netdev), ovs_strerror(errno));
> +            return err;
> +        }
> +
> +        if (bpf_map_update_elem(head_fd, &zero, &head, 0)) {
> +            err = errno;
> +            VLOG_ERR("Cannot update subtbl_masks_hd: %s",
> +                     ovs_strerror(errno));
> +            return err;
> +        }
> +
> +        err = bpf_map__reuse_fd(subtbl_masks_hd, head_fd);
> +        if (err) {
> +            VLOG_ERR("%s: Failed to reuse subtbl_masks_hd fd: %s",
> +                     netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +            close(head_fd);
> +            return EINVAL;
> +        }
> +        close(head_fd);
> +    }
> +
> +    xsks_map = bpf_object__find_map_by_name(obj, "xsks_map");
> +    if (!xsks_map) {
> +        VLOG_ERR("%s: BUG: \"xsks_map\" map does not exist",
> +                 netdev_get_name(netdev));
> +        return EOPNOTSUPP;
> +    }
> +
> +    n_rxq = netdev_n_rxq(netdev);
> +    xsks_map_fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
> +                                      sizeof(int), sizeof(int), n_rxq, 0);
> +    if (xsks_map_fd < 0) {
> +        err = errno;
> +        VLOG_WARN("%s: Map creation for xsks_map failed: %s",
> +                  netdev_get_name(netdev), ovs_strerror(errno));
> +        return err;
> +    }
> +
> +    err = bpf_map__reuse_fd(xsks_map, xsks_map_fd);
> +    if (err) {
> +        VLOG_WARN("%s: Failed to reuse xsks_map fd: %s",
> +                  netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +        close(xsks_map_fd);
> +        return EINVAL;
> +    }
> +    close(xsks_map_fd);
> +
> +    err = bpf_map__reuse_fd(output_map, output_map_fd);
> +    if (err) {
> +        VLOG_WARN("%s: Failed to reuse output_map fd: %s",
> +                  netdev_get_name(netdev), ovs_libbpf_strerror(err));
> +        return EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
>  static int
>  xsk_load_prog(struct netdev *netdev, const char *path, struct bpf_object 
> **pobj,
>                int *prog_fd)
>  {
> +    struct netdev_linux *dev = netdev_linux_cast(netdev);
>      struct bpf_object_open_attr attr = {
>          .prog_type = BPF_PROG_TYPE_XDP,
>          .file = path,
> @@ -298,6 +495,12 @@ xsk_load_prog(struct netdev *netdev, const char *path, 
> struct bpf_object **pobj,
>          goto err;
>      }
>
> +    if (!xdp_preload(netdev, obj)) {
> +        VLOG_INFO("%s: Detected flowtable support in XDP program",
> +                  netdev_get_name(netdev));
> +        dev->has_xdp_flowtable = true;
> +    }
> +
>      if (bpf_object__load(obj)) {
>          VLOG_ERR("%s: Can't load XDP program at '%s'",
>                   netdev_get_name(netdev), path);
> @@ -1297,7 +1500,7 @@ libbpf_print(enum libbpf_print_level level,
>  int netdev_afxdp_init(void)
>  {
>      libbpf_set_print(libbpf_print);
> -    return 0;
> +    return netdev_register_flow_api_provider(&netdev_offload_xdp);
maybe use the
ovsthread_once_start(&once) to make sure it is register only once.

>  }
>
>  int
> diff --git a/lib/netdev-afxdp.h b/lib/netdev-afxdp.h
> index e91cd102d..324152e8f 100644
> --- a/lib/netdev-afxdp.h
> +++ b/lib/netdev-afxdp.h
> @@ -44,6 +44,7 @@ struct netdev_stats;
>  struct smap;
>  struct xdp_umem;
>  struct xsk_socket_info;
> +struct bpf_object;
>
>  int netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_);
>  void netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_);
> @@ -70,6 +71,8 @@ int netdev_afxdp_get_custom_stats(const struct netdev 
> *netdev,
>  void free_afxdp_buf(struct dp_packet *p);
>  int netdev_afxdp_reconfigure(struct netdev *netdev);
>  void signal_remove_xdp(struct netdev *netdev);
> +bool has_xdp_flowtable(struct netdev *netdev);
> +struct bpf_object *get_xdp_object(struct netdev *netdev);
>
>  #else /* !HAVE_AF_XDP */
>
> diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
> index 0f8823c45..876510cb7 100644
> --- a/lib/netdev-linux-private.h
> +++ b/lib/netdev-linux-private.h
> @@ -121,6 +121,8 @@ struct netdev_linux {
>      const char *xdp_obj_path;         /* XDP object file path. */
>      const char *requested_xdp_obj;
>      struct bpf_object *xdp_obj;
> +
> +    bool has_xdp_flowtable;
>  #endif
>  };
>
> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
> index 0bed7bf61..7147a5166 100644
> --- a/lib/netdev-offload-provider.h
> +++ b/lib/netdev-offload-provider.h
> @@ -86,6 +86,9 @@ struct netdev_flow_api {
>      /* Initializies the netdev flow api.
typo: initializes
>      * Return 0 if successful, otherwise returns a positive errno value. */
>      int (*init_flow_api)(struct netdev *);
> +
> +    /* Uninitialize the netdev flow api. */
> +    void (*uninit_flow_api)(struct netdev *);
>  };
>
>  int netdev_register_flow_api_provider(const struct netdev_flow_api *);
> @@ -94,6 +97,9 @@ bool netdev_flow_api_equals(const struct netdev *, const 
> struct netdev *);
>
>  #ifdef __linux__
>  extern const struct netdev_flow_api netdev_offload_tc;
> +#ifdef HAVE_AF_XDP
> +extern const struct netdev_flow_api netdev_offload_xdp;
> +#endif
>  #endif
>
>  #ifdef DPDK_NETDEV
> diff --git a/lib/netdev-offload-xdp.c b/lib/netdev-offload-xdp.c
> new file mode 100644
> index 000000000..53eb24557
> --- /dev/null
> +++ b/lib/netdev-offload-xdp.c
> @@ -0,0 +1,1315 @@
> +/*
> + * Copyright (c) 2020 NTT Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "netdev-offload-xdp.h"
> +
> +#include <bpf/bpf.h>
> +#include <bpf/btf.h>
> +#include <bpf/libbpf.h>
> +#include <errno.h>
> +#include <unistd.h>
> +
> +#include "bpf-util.h"
> +#include "dpif.h"
> +#include "hash.h"
> +#include "netdev.h"
> +#include "netdev-offload-provider.h"
> +#include "netlink.h"
> +#include "odp-netlink.h"
> +#include "openvswitch/hmap.h"
> +#include "openvswitch/match.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(netdev_offload_xdp);
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
> +
> +static struct hmap ufid_to_xdp = HMAP_INITIALIZER(&ufid_to_xdp);
> +
> +static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
> +
> +struct ufid_to_xdp_data {
> +    struct hmap_node ufid_node;
> +    ovs_u128 ufid;
> +    struct minimask mask;
> +    uint64_t mask_buf[FLOW_MAX_PACKET_U64S];
> +    struct miniflow flow;
> +    uint64_t flow_buf[FLOW_MAX_PACKET_U64S];
> +};
> +
> +static struct hmap netdev_info_table = HMAP_INITIALIZER(&netdev_info_table);
> +
> +struct netdev_info {
the "netdev_info" makes me think this is something generic to netdev.
But this is actually XDP-specific.
How about changing the name to netdev_xdp_info?

> +    struct hmap_node port_node;
> +    struct netdev *netdev;
> +    odp_port_t port;
> +    uint32_t devmap_idx;
> +    struct miniflow supported_keys;
> +    uint64_t supported_keys_buf[FLOW_MAX_PACKET_U64S];
> +    uint32_t supported_actions;
> +    uint32_t max_subtables;
> +    uint32_t subtable_mask_size;
> +    uint32_t key_size;
> +    uint32_t max_actions;
> +    uint32_t max_actions_len;
> +    uint32_t max_entries;
> +    int free_slot_top;
> +    int free_slots[XDP_MAX_SUBTABLES];
> +};
> +
> +static struct hmap devmap_idx_table = HMAP_INITIALIZER(&devmap_idx_table);
> +
> +struct devmap_idx_data {
> +    struct hmap_node node;
> +    int devmap_idx;
> +};
> +
> +
> +/* Free entry managemant for list implementation using array */
type: management

I see you're maintaining a resource pool containing free slot for subtable.
I'm wondering if lib/id-pool.c can be used here?

> +
> +static void
> +init_subtbl_masks_free_slot(struct netdev_info *netdev_info)
> +{
> +    int i;
> +    int max_subtables = netdev_info->max_subtables;
> +
> +    for (i = 0; i < max_subtables; i++) {
> +        netdev_info->free_slots[max_subtables - 1 - i] = i;
> +    }
> +    netdev_info->free_slot_top = max_subtables - 1;
> +}
> +
> +static int
> +get_subtbl_masks_free_slot(const struct netdev_info *netdev_info, int *slot)
> +{
> +    if (netdev_info->free_slot_top < 0) {
> +        return ENOBUFS;
> +    }
> +
> +    *slot = netdev_info->free_slots[netdev_info->free_slot_top];
> +    return 0;
> +}
> +
> +static int
> +add_subtbl_masks_free_slot(struct netdev_info *netdev_info, int slot)
> +{
> +    if (netdev_info->free_slot_top >= netdev_info->max_subtables - 1) {
> +        VLOG_ERR_RL(&rl, "BUG: free_slot overflow: top=%d, slot=%d",
> +                    netdev_info->free_slot_top, slot);
> +        return EOVERFLOW;
> +    }
> +
> +    netdev_info->free_slots[++netdev_info->free_slot_top] = slot;
> +    return 0;
> +}
> +
> +static void
> +delete_subtbl_masks_free_slot(struct netdev_info *netdev_info, int slot)
> +{
> +    int top_slot;
> +
> +    if (netdev_info->free_slot_top < 0) {
> +        VLOG_ERR_RL(&rl, "BUG: free_slot underflow: top=%d, slot=%d",
> +                    netdev_info->free_slot_top, slot);
> +        return;
> +    }
> +
> +    top_slot = netdev_info->free_slots[netdev_info->free_slot_top];
> +    if (top_slot != slot) {
> +        VLOG_ERR_RL(&rl,
> +                    "BUG: inconsistent free_slot top: top_slot=%d, slot=%d",
> +                    top_slot, slot);
> +        return;
> +    }
> +
> +    netdev_info->free_slot_top--;
> +}
> +
> +
> +#define FLOW_MASK_FIELD(MASK, FIELD) \
> +    memset(&(MASK).FIELD, 0xff, sizeof (MASK).FIELD)
> +
> +static int
> +probe_supported_keys(struct netdev_info *netdev_info, struct btf *btf,
> +                     uint32_t type)
> +{
> +    struct miniflow *mf = &netdev_info->supported_keys;
> +    struct flowmap *map = &mf->map;
> +    struct flow mask;
> +    struct btf_member *m;
> +    const struct btf_type *pt, *t;
> +    int i;
> +
> +    pt = btf__type_by_id(btf, type);
> +    if (!pt) {
> +        VLOG_ERR("\"supported_keys\" field type is unknown");
> +        return EINVAL;
> +    }
> +    if (!btf_is_ptr(pt)) {
> +        VLOG_ERR("\"supported_keys\" field is not ptr");
> +        return EINVAL;
> +    }
> +    t = btf__type_by_id(btf, pt->type);
> +    if (!t) {
> +        VLOG_ERR("\"supported_keys\" ptr type is unknown");
> +        return EINVAL;
> +    }
> +    if (!btf_is_struct(t)) {
> +        VLOG_ERR("\"supported_keys\" field is not struct ptr");
> +        return EINVAL;
> +    }
> +
> +    memset(&mask, 0, sizeof mask);
> +    flowmap_init(map);
> +    for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
> +        const char *name = btf__name_by_offset(btf, m->name_off);
> +
> +        if (!name) {
> +            VLOG_ERR("Unnamed field #%d in \"supported_keys\" struct", i);
> +            return EINVAL;
> +        }
> +        if (!strcmp(name, "dl_dst")) {
> +            FLOWMAP_SET(map, dl_dst);
> +            FLOW_MASK_FIELD(mask, dl_dst);
> +        } else if (!strcmp(name, "dl_src")) {
> +            FLOWMAP_SET(map, dl_src);
> +            FLOW_MASK_FIELD(mask, dl_src);
> +        } else if (!strcmp(name, "dl_type")) {
> +            FLOWMAP_SET(map, dl_type);
> +            FLOW_MASK_FIELD(mask, dl_type);
> +        } else if (!strcmp(name, "vlans")) {
> +            const struct btf_type *vt;
> +            const struct btf_array *arr;
> +
> +            FLOWMAP_SET(map, vlans);
> +            vt = btf__type_by_id(btf, m->type);
> +            if (!vt) {
> +                VLOG_ERR("\"vlans\" field type is unknown");
> +                return EINVAL;
> +            }
> +            if (!btf_is_array(vt)) {
> +                VLOG_ERR("\"vlans\" field is not array");
> +                return EINVAL;
> +            }
> +            arr = btf_array(vt);
> +            if (arr->nelems > 2) {
> +                VLOG_ERR("\"vlans\" elems too many: %u", arr->nelems);
> +                return EINVAL;
> +            }
> +            memset(&mask.vlans, 0xff, sizeof mask.vlans[0] * arr->nelems);
> +        } else if (!strcmp(name, "nw_src")) {
> +            FLOWMAP_SET(map, nw_src);
> +            FLOW_MASK_FIELD(mask, nw_src);
> +        } else if (!strcmp(name, "nw_dst")) {
> +            FLOWMAP_SET(map, nw_dst);
> +            FLOW_MASK_FIELD(mask, nw_dst);
> +        } else if (!strcmp(name, "nw_frag")) {
> +            FLOWMAP_SET(map, nw_frag);
> +            FLOW_MASK_FIELD(mask, nw_frag);
> +        } else if (!strcmp(name, "nw_tos")) {
> +            FLOWMAP_SET(map, nw_tos);
> +            FLOW_MASK_FIELD(mask, nw_tos);
> +        } else if (!strcmp(name, "nw_ttl")) {
> +            FLOWMAP_SET(map, nw_ttl);
> +            FLOW_MASK_FIELD(mask, nw_ttl);
> +        } else if (!strcmp(name, "nw_proto")) {
> +            FLOWMAP_SET(map, nw_proto);
> +            FLOW_MASK_FIELD(mask, nw_proto);
> +        } else if (!strcmp(name, "tp_src")) {
> +            FLOWMAP_SET(map, tp_src);
> +            FLOW_MASK_FIELD(mask, tp_src);
> +        } else if (!strcmp(name, "tp_dst")) {
> +            FLOWMAP_SET(map, tp_dst);
> +            FLOW_MASK_FIELD(mask, tp_dst);
> +        } else if (strncmp(name, "pad", 3)) {
> +            VLOG_ERR("Unsupported flow key %s", name);
> +            return EOPNOTSUPP;
> +        }
> +    }
> +
> +    miniflow_init(mf, &mask);
> +
> +    return 0;
> +}
> +
> +static bool
> +is_supported_keys(struct netdev_info *netdev_info, const struct minimask 
> *mask)
> +{
> +    const struct miniflow *mf = &mask->masks;
> +    const uint64_t *p = miniflow_get_values(mf);
> +    size_t idx;
> +
> +    FLOWMAP_FOR_EACH_INDEX(idx, mf->map) {
> +        uint64_t supported = miniflow_get(&netdev_info->supported_keys, idx);
> +        if (~supported & *p) {
> +            VLOG_DBG("Unsupported key: Index=%lu, Supported=%lx, Mask=%lx",
> +                     idx, supported, *p);
> +            return false;
> +        }
> +        p++;
> +    }
> +    return true;
> +}
> +
> +static int
> +probe_supported_actions(struct netdev_info *netdev_info, struct btf *btf,
> +                        uint32_t type)
> +{
> +    const struct btf_type *pt, *t;
> +    const struct btf_enum *v;
> +    int i;
> +
> +    pt = btf__type_by_id(btf, type);
> +    if (!pt) {
> +        VLOG_ERR("\"supported_actions\" field type is unknown");
> +        return EINVAL;
> +    }
> +    if (!btf_is_ptr(pt)) {
> +        VLOG_ERR("\"supported_actions\" field is not ptr");
> +        return EINVAL;
> +    }
> +    t = btf__type_by_id(btf, pt->type);
> +    if (!t) {
> +        VLOG_ERR("\"supported_actions\" ptr type is unknown");
> +        return EINVAL;
> +    }
> +    if (!btf_is_enum(t)) {
> +        VLOG_ERR("\"supported_actions\" field is not enum ptr");
> +        return EINVAL;
> +    }
> +
> +    netdev_info->supported_actions = 0;
> +    v = btf_enum(t);
> +    for (i = 0; i < btf_vlen(t); i++) {
> +        const char *name = btf__name_by_offset(btf, v[i].name_off);
> +
> +        if (!name) {
> +            VLOG_ERR("Unnamed field #%d in \"supported_actions\" enum", i);
> +            return EINVAL;
> +        }
> +        switch (v[i].val) {
> +        case OVS_ACTION_ATTR_OUTPUT:
> +        case OVS_ACTION_ATTR_PUSH_VLAN:
> +        case OVS_ACTION_ATTR_POP_VLAN:
> +            netdev_info->supported_actions |= (1 << v[i].val);
> +            break;
> +        default:
> +            VLOG_ERR("Action \"%s\" (%d) is not supported",
> +                     name, v[i].val);
> +            return EOPNOTSUPP;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static bool
> +is_supported_actions(struct netdev_info *netdev_info,
> +                     const struct nlattr *actions, size_t actions_len)
> +{
> +    const struct nlattr *a;
> +    unsigned int left;
> +    int actions_num = 0;
> +
> +    NL_ATTR_FOR_EACH_UNSAFE(a, left, actions, actions_len) {
> +        int type = nl_attr_type(a);
> +
> +        if (!(netdev_info->supported_actions & (1 << type))) {
> +            VLOG_DBG("Unsupported action: %d", type);
> +            return false;
> +        }
> +        actions_num++;
> +    }
> +
> +    if (actions_num > netdev_info->max_actions) {
> +        VLOG_DBG("Too many actions: %d", actions_num);
> +        return false;
> +    }
> +    return true;
> +}
> +
> +static int
> +probe_max_actions(struct netdev_info *netdev_info, struct btf *btf,
> +                  uint32_t type)
> +{
> +    const struct btf_type *pt, *at;
> +    const struct btf_array *arr;
> +
> +    pt = btf__type_by_id(btf, type);
> +    if (!pt) {
> +        VLOG_ERR("\"max_actions\" field type is unknown");
> +        return EINVAL;
> +    }
> +    if (!btf_is_ptr(pt)) {
> +        VLOG_ERR("\"max_actions\" field is not ptr");
> +        return EINVAL;
> +    }
> +    at = btf__type_by_id(btf, pt->type);
> +    if (!at) {
> +        VLOG_ERR("\"max_actions\" ptr type is unknown");
> +        return EINVAL;
> +    }
> +    if (!btf_is_array(at)) {
> +        VLOG_ERR("\"max_actions\" field is not array ptr");
> +        return EINVAL;
> +    }
> +    arr = btf_array(at);
> +    netdev_info->max_actions = arr->nelems;
> +
> +    return 0;
> +}
> +
> +static int
> +probe_meta_info(struct netdev_info *netdev_info, struct btf *btf)
> +{
> +    int32_t meta_sec_id;
> +    struct btf_var_secinfo *vi;
> +    struct btf_member *m;
> +    const struct btf_type *sec, *t = NULL;
> +    bool supported_keys_found = false;
> +    int i;
> +
> +    meta_sec_id = btf__find_by_name_kind(btf, ".ovs_meta", BTF_KIND_DATASEC);
> +    if (meta_sec_id < 0) {
> +        VLOG_ERR("BUG: \".ovs_meta\" datasec not found in BTF");
> +        return EINVAL;
> +    }
> +
> +    sec = btf__type_by_id(btf, meta_sec_id);
> +    for (i = 0, vi = btf_var_secinfos(sec); i < btf_vlen(sec); i++, vi++) {
> +        const struct btf_type *var = btf__type_by_id(btf, vi->type);
> +        const char *name;
> +
> +        if (!var) {
> +            VLOG_ERR("\".ovs_meta\" var #%d type is unknown", i);
> +            return EINVAL;
> +        }
> +        name = btf__name_by_offset(btf, var->name_off);
> +        if (!name) {
> +            VLOG_ERR("\".ovs_meta\" var #%d name is empty", i);
> +            return EINVAL;
> +        }
> +        if (strcmp(name, "meta_info")) {
> +            continue;
> +        }
> +        if (!btf_is_var(var)) {
> +            VLOG_ERR("\"meta_info\" is not var");
> +            return EINVAL;
> +        }
> +        t = btf__type_by_id(btf, var->type);
> +        if (!t) {
> +            VLOG_ERR("\"meta_info\" var type is unknown");
> +            return EINVAL;
> +        }
> +        break;
> +    }
> +
> +    if (!t) {
> +        VLOG_ERR("\"meta_info\" var not found in \".ovs_meta\" datasec");
> +        return EINVAL;
> +    }
> +
> +    if (!btf_is_struct(t)) {
> +        VLOG_ERR("\"meta_info\" is not struct");
> +        return EINVAL;
> +    }
> +
> +    for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
> +        const char *name = btf__name_by_offset(btf, m->name_off);
> +        int err;
> +
> +        if (!name) {
> +            VLOG_ERR("Invalid field #%d in \"meta_info\" struct", i);
> +            return EINVAL;
> +        }
> +        if (!strcmp(name, "supported_keys")) {
> +            err = probe_supported_keys(netdev_info, btf, m->type);
> +            if (err) {
> +                return err;
> +            }
> +            supported_keys_found = true;
> +        } else if (!strcmp(name, "supported_actions")) {
> +            err = probe_supported_actions(netdev_info, btf, m->type);
> +            if (err) {
> +                return err;
> +            }
> +        } else if (!strcmp(name, "max_actions")) {
> +            err = probe_max_actions(netdev_info, btf, m->type);
> +            if (err) {
> +                return err;
> +            }
> +        } else {
> +            VLOG_ERR("Unsupported meta_info key %s", name);
> +            return EOPNOTSUPP;
> +        }
> +    }
> +
> +    if (!supported_keys_found) {
> +        VLOG_ERR("\"supported_keys\" field not found in \"meta_info\"");
> +        return EINVAL;
> +    }
> +    if (!netdev_info->supported_actions) {
> +        VLOG_ERR("\"supported_actions\" field not found in \"meta_info\"");
> +        return EINVAL;
> +    }
> +    if (!netdev_info->max_actions) {
> +        VLOG_ERR("\"max_actions\" field not found in \"meta_info\"");
> +        return EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
> +
> +static struct netdev_info *
> +find_netdev_info(odp_port_t port)
> +{
> +    size_t port_hash = hash_bytes(&port, sizeof port, 0);
> +    struct netdev_info *netdev_info;
> +
> +    HMAP_FOR_EACH_WITH_HASH(netdev_info, port_node, port_hash,
> +                            &netdev_info_table) {
> +        if (port == netdev_info->port) {
> +            return netdev_info;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static int
> +get_odp_port(struct netdev *netdev, odp_port_t *port)
> +{
> +    int ifindex;
> +
> +    ifindex = netdev_get_ifindex(netdev);
> +    if (ifindex < 0) {
> +        VLOG_ERR_RL(&rl, "Failed to get ifindex for %s: %s",
> +                    netdev_get_name(netdev), ovs_strerror(-ifindex));
> +        return -ifindex;
> +    }
> +
> +    *port = netdev_ifindex_to_odp_port(ifindex);
> +    return 0;
> +}
> +
> +static struct netdev_info *
> +get_netdev_info(struct netdev *netdev)
> +{
> +    struct netdev_info *netdev_info;
> +    odp_port_t port;
> +
> +    if (get_odp_port(netdev, &port)) {
> +        return NULL;
> +    }
> +
> +    netdev_info = find_netdev_info(port);
> +    if (!netdev_info) {
> +        VLOG_ERR_RL(&rl, "Failed to find netdev_info for %s",
> +                    netdev_get_name(netdev));
> +    }
> +
> +    return netdev_info;
> +}
> +
> +
> +/* Convert odp_port to devmap_idx in output action */
> +static int
> +convert_port_to_devmap_idx(struct nlattr *actions, size_t actions_len)
> +{
> +    struct nlattr *a;
> +    unsigned int left;
> +    bool output_seen = false;
> +
> +    NL_ATTR_FOR_EACH_UNSAFE(a, left, actions, actions_len) {
> +        int type = nl_attr_type(a);
> +
> +        if (output_seen) {
> +            VLOG_DBG("XDP does not support packet copy");
> +            return EOPNOTSUPP;
> +        }
> +
> +        if (type == OVS_ACTION_ATTR_OUTPUT) {
> +            odp_port_t *port;
> +            struct netdev_info *netdev_info;
> +
> +            port = CONST_CAST(odp_port_t *,
> +                              nl_attr_get_unspec(a, sizeof(odp_port_t)));
> +            netdev_info = find_netdev_info(*port);
> +            if (!netdev_info) {
> +                VLOG_DBG("Cannot output to port %u without XDP prog 
> attached",
> +                         *port);
> +                return EOPNOTSUPP;
> +            }
> +            /* XXX: Some NICs cannot handle XDP_REDIRECT'ed packets even with
> +             * XDP program enabled. Linux netdev community is considering
> +             * adding feature detection in XDP */
> +
> +            *port = u32_to_odp(netdev_info->devmap_idx);
> +            output_seen = true;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static struct devmap_idx_data *
> +find_devmap_idx(int devmap_idx)
> +{
> +    struct devmap_idx_data *data;
> +    size_t hash = hash_bytes(&devmap_idx, sizeof devmap_idx, 0);
> +
> +    HMAP_FOR_EACH_WITH_HASH(data, node, hash, &devmap_idx_table) {
> +        if (devmap_idx == data->devmap_idx) {
> +            return data;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static int
> +get_new_devmap_idx(int *pidx)
Can we also use id-pool here?

> +{
> +    static int max_devmap_idx = 0;
why static here?

> +    int offset;
> +
> +    for (offset = 0; offset < XDP_MAX_PORTS; offset++) {
> +        int devmap_idx = max_devmap_idx++;
> +
> +        if (max_devmap_idx >= XDP_MAX_PORTS) {
> +            max_devmap_idx -= XDP_MAX_PORTS;
> +        }
> +
> +        if (!find_devmap_idx(devmap_idx)) {
> +            struct devmap_idx_data *data;
> +            size_t hash = hash_bytes(&devmap_idx, sizeof devmap_idx, 0);
> +
> +            data = xzalloc(sizeof *data);
> +            data->devmap_idx = devmap_idx;
> +            hmap_insert(&devmap_idx_table, &data->node, hash);
> +
> +            *pidx = devmap_idx;
> +            return 0;
> +        }
> +    }
> +
> +    return ENOSPC;
> +}
> +
> +static void
> +delete_devmap_idx(int devmap_idx)
> +{
> +    struct devmap_idx_data *data = find_devmap_idx(devmap_idx);
> +
> +    if (data) {
> +        hmap_remove(&devmap_idx_table, &data->node);
> +        free(data);
> +    }
> +}
> +
> +
> +static int
> +get_table_fd(const struct bpf_object *obj, const char *table_name,
> +             int *pmap_fd)
> +{
> +    struct bpf_map *map;
> +    int map_fd;
> +
> +    map = bpf_object__find_map_by_name(obj, table_name);
> +    if (!map) {
> +        VLOG_ERR_RL(&rl, "BPF map \"%s\" not found", table_name);
> +        return ENOENT;
> +    }
> +
> +    map_fd = bpf_map__fd(map);
> +    if (map_fd < 0) {
> +        VLOG_ERR_RL(&rl, "Invalid BPF map fd: %s",
> +                    ovs_libbpf_strerror(map_fd));
> +        return EINVAL;
> +    }
> +
> +    *pmap_fd = map_fd;
> +    return 0;
> +}
> +
> +static int
> +get_subtbl_masks_hd_fd(const struct bpf_object *obj, int *head_fd)
> +{
> +    return get_table_fd(obj, "subtbl_masks_hd", head_fd);
> +}
> +
> +static int
> +get_subtbl_masks_hd(int head_fd, int *head)
> +{
> +    int err, zero = 0;
> +
> +    if (bpf_map_lookup_elem(head_fd, &zero, head)) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Cannot get subtbl_masks_hd: %s",
> +                    ovs_strerror(errno));
> +        return err;
> +    }
> +
> +    return 0;
> +}
> +
> +static int
> +update_subtbl_masks_hd(int head_fd, int head)
> +{
> +    int err, zero = 0;
> +
> +    if (bpf_map_update_elem(head_fd, &zero, &head, 0)) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Cannot update subtbl_masks_hd: %s",
> +                    ovs_strerror(errno));
> +        return err;
> +    }
> +
> +    return 0;
> +}
> +
> +static int
> +get_subtbl_masks_fd(const struct bpf_object *obj, int *masks_fd)
> +{
> +    return get_table_fd(obj, "subtbl_masks", masks_fd);
> +}
> +
> +static int
> +get_flow_table_fd(const struct bpf_object *obj, int *tables_fd)
> +{
> +    return get_table_fd(obj, "flow_table", tables_fd);
> +}
> +
> +static int
> +get_output_map_fd(const struct bpf_object *obj, int *output_map_fd)
> +{
> +    return get_table_fd(obj, "output_map", output_map_fd);
> +}
> +
> +
> +static int
> +netdev_xdp_flow_put(struct netdev *netdev, struct match *match_,
> +                    struct nlattr *actions, size_t actions_len,
> +                    const ovs_u128 *ufid, struct offload_info *info 
> OVS_UNUSED,
> +                    struct dpif_flow_stats *stats OVS_UNUSED)
> +{
> +    struct netdev_info *netdev_info;
> +    struct bpf_object *obj = get_xdp_object(netdev);
> +    struct minimatch minimatch;
> +    struct match *match;
> +    uint32_t key_size;
> +    size_t fidx;
> +    uint64_t *flow_u64, *mask_u64, *tmp_values;
> +    int masks_fd, head_fd, flow_table_fd, subtbl_fd, free_slot, head;
> +    struct xdp_subtable_mask_header *entry, *pentry;
> +    struct xdp_flow_actions_header *xdp_actions;
> +    char subtbl_name[BPF_OBJ_NAME_LEN];
> +    size_t hash;
> +    struct ufid_to_xdp_data *data;
> +    int cnt, idx, pidx;
> +    int err;
> +
> +    netdev_info = get_netdev_info(netdev);
> +    if (!netdev_info) {
> +        return ENOENT;
> +    }
> +
> +    /* Assume only eth packets on packet reception in XDP */
> +    if (match_->wc.masks.packet_type &&
> +        match_->flow.packet_type != htonl(PT_ETH)) {
> +        VLOG_DBG_RL(&rl, "Packet type not ETH");
> +        return EOPNOTSUPP;
> +    }
> +
> +    /* probe_supported_key() does not support recirculation */
> +    if (match_->wc.masks.recirc_id && match_->flow.recirc_id) {
> +        VLOG_DBG_RL(&rl, "Recirc id not zero");
> +        return EOPNOTSUPP;
> +    }
> +
> +    match = xmemdup(match_, sizeof *match);
> +    /* XDP only handles packets with packet_type = 0 and recirc_id = 0 so
> +     * clear masks to reduce max key size */
> +    match->wc.masks.packet_type = 0;
> +    match->wc.masks.recirc_id = 0;
> +    /* We install per-port XDP classifier table so no need for odp_port */
> +    match->wc.masks.in_port.odp_port = 0;
> +    minimatch_init(&minimatch, match);
> +    free(match);
> +
> +    key_size = MINIFLOW_VALUES_SIZE(miniflow_n_values(minimatch.flow));
> +    if (key_size > netdev_info->key_size) {
> +        err = EOPNOTSUPP;
> +        VLOG_DBG_RL(&rl, "Key size too big");
> +        goto err;
> +    }
> +
> +    if (sizeof(struct xdp_flow_actions_header) + actions_len >
> +        netdev_info->max_actions_len) {
> +        err = EOPNOTSUPP;
> +        VLOG_DBG_RL(&rl, "Actions size too big");
> +        goto err;
> +    }
> +
> +    /* XDP only uses masked keys so need to mask the key before adding an
> +     * entry otherwise table miss unexpectedly happens in XDP */
> +    mask_u64 = miniflow_values(&minimatch.mask->masks);
> +    flow_u64 = miniflow_values(minimatch.flow);
> +    FLOWMAP_FOR_EACH_INDEX(fidx, minimatch.mask->masks.map) {
> +        *flow_u64++ &= *mask_u64++;
> +    }
> +
> +    if (!is_supported_keys(netdev_info, minimatch.mask)) {
> +        err = EOPNOTSUPP;
> +        VLOG_DBG_RL(&rl, "Key not supported");
> +        goto err;
> +    }
> +
> +    if (!is_supported_actions(netdev_info, actions, actions_len)) {
> +        err = EOPNOTSUPP;
> +        VLOG_DBG_RL(&rl, "Actions not supported");
> +        goto err;
> +    }
> +
> +    /* subtables in XDP is hash table whose key is miniflow value and whose
> +     * value is actions preceded by actions_len */
> +    xdp_actions = xzalloc(netdev_info->max_actions_len);
> +    xdp_actions->actions_len = actions_len;
> +    memcpy(xdp_flow_actions(xdp_actions), actions, actions_len);
> +
> +    /* TODO: Use XDP_TX for redirect action when possible */
XDP_TX can only forward the packet to one netdev.
What happen when the actions have multiple output ports?

> +    err = convert_port_to_devmap_idx(xdp_flow_actions(xdp_actions),
> +                                     actions_len);
> +    if (err) {
> +        goto err_actions;
> +    }
> +
> +    err = get_subtbl_masks_fd(obj, &masks_fd);
> +    if (err) {
> +        goto err_actions;
> +    }
> +
> +    err = get_subtbl_masks_hd_fd(obj, &head_fd);
> +    if (err) {
> +        goto err_actions;
> +    }
> +
> +    err = get_subtbl_masks_hd(head_fd, &head);
> +    if (err) {
> +        goto err_actions;
> +    }
> +
> +    err = get_flow_table_fd(obj, &flow_table_fd);
> +    if (err) {
> +        goto err_actions;
> +    }
> +
> +    entry = xzalloc(netdev_info->subtable_mask_size);
> +    pentry = xzalloc(netdev_info->subtable_mask_size);
> +
> +    /* Iterate subtable mask list implemented using array */
> +    idx = head;
> +    for (cnt = 0; cnt < netdev_info->max_subtables; cnt++) {
> +        if (idx == XDP_SUBTABLES_TAIL) {
> +            break;
> +        }
> +
> +        if (bpf_map_lookup_elem(masks_fd, &idx, entry)) {
> +            err = errno;
> +            VLOG_ERR_RL(&rl, "Cannot lookup subtbl_masks: %s",
> +                        ovs_strerror(errno));
> +            goto err_entry;
> +        }
> +
> +        if (minimask_equal(minimatch.mask, &entry->mask)) {
> +            __u32 id;
> +
> +            if (bpf_map_lookup_elem(flow_table_fd, &idx, &id)) {
> +                err = errno;
> +                VLOG_ERR_RL(&rl, "Cannot lookup flow_table: %s",
> +                            ovs_strerror(errno));
> +                goto err_entry;
> +            }
> +
> +            subtbl_fd = bpf_map_get_fd_by_id(id);
> +            if (subtbl_fd < 0) {
> +                err = errno;
> +                VLOG_ERR_RL(&rl, "Cannot get subtbl fd by id: %s",
> +                            ovs_strerror(errno));
> +                goto err_entry;
> +            }
> +
> +            tmp_values = xzalloc(netdev_info->key_size);
> +            memcpy(tmp_values, miniflow_get_values(minimatch.flow), 
> key_size);
> +            if (bpf_map_update_elem(subtbl_fd, tmp_values, xdp_actions, 0)) {
> +                err = errno;
> +                VLOG_ERR_RL(&rl, "Cannot insert flow entry: %s",
> +                            ovs_strerror(errno));
> +                free(tmp_values);
> +                goto err_close;
> +            }
> +
> +            entry->count++;
> +            if (bpf_map_update_elem(masks_fd, &idx, entry, 0)) {
> +                err = errno;
> +                VLOG_ERR_RL(&rl, "Cannot update subtbl_masks count: %s",
> +                            ovs_strerror(errno));
> +                bpf_map_delete_elem(subtbl_fd, tmp_values);
> +                free(tmp_values);
> +                goto err_close;
> +            }
> +            free(tmp_values);
> +
> +            goto out;
> +        }
> +
> +        memcpy(pentry, entry, netdev_info->subtable_mask_size);
> +        pidx = idx;
> +        idx = entry->next;
question about this for loop:
why do we need to maintain ->next for the struct
xdp_subtable_mask_header *entry?
I thought we have a fixed-sized array of subtable.
And all we need to do is go over all the valid index of the array,
until finding a match.

Thanks
William

> +    }
> +
> +    if (cnt == netdev_info->max_subtables && idx != XDP_SUBTABLES_TAIL) {
> +        err = EINVAL;
> +        VLOG_ERR_RL(&rl,
> +                    "Cannot lookup subtbl_masks: Broken subtbl_masks list");
> +        goto err_entry;
> +    }
> +
> +    /* Subtable was not found. Create a new one */
> +
> +    err = get_subtbl_masks_free_slot(netdev_info, &free_slot);
> +    if (err) {
> +        goto err_entry;
> +    }
> +
> +    miniflow_clone(&entry->mask.masks, &minimatch.mask->masks,
> +                   miniflow_n_values(&minimatch.mask->masks));
> +    entry->mf_bits_u0 = count_1bits(minimatch.mask->masks.map.bits[0]);
> +    entry->mf_bits_u1 = count_1bits(minimatch.mask->masks.map.bits[1]);
> +    entry->count = 1;
> +    entry->next = XDP_SUBTABLES_TAIL;
> +    if (bpf_map_update_elem(masks_fd, &free_slot, entry, 0)) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Cannot update subtbl_masks: %s",
> +                    ovs_strerror(errno));
> +        goto err_entry;
> +    }
> +
> +    if (snprintf(subtbl_name, BPF_OBJ_NAME_LEN, "subtbl_%d_%d",
> +                 netdev_info->port, free_slot) < 0) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "snprintf for subtable name failed: %s",
> +                    ovs_strerror(errno));
> +        goto err_entry;
> +    }
> +    subtbl_fd = bpf_create_map_name(BPF_MAP_TYPE_HASH, subtbl_name,
> +                                    netdev_info->key_size,
> +                                    netdev_info->max_actions_len,
> +                                    netdev_info->max_entries, 0);
> +    if (subtbl_fd < 0) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "map creation for subtbl failed: %s",
> +                    ovs_strerror(errno));
> +        goto err_entry;
> +    }
> +
> +    tmp_values = xzalloc(netdev_info->key_size);
> +    memcpy(tmp_values, miniflow_get_values(minimatch.flow), key_size);
> +    if (bpf_map_update_elem(subtbl_fd, tmp_values, xdp_actions, 0)) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Cannot insert flow entry: %s", 
> ovs_strerror(errno));
> +        free(tmp_values);
> +        goto err_close;
> +    }
> +    free(tmp_values);
> +
> +    if (bpf_map_update_elem(flow_table_fd, &free_slot, &subtbl_fd, 0)) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Failed to insert subtbl into flow_table: %s",
> +                    ovs_strerror(errno));
> +        goto err_close;
> +    }
> +
> +    if (cnt == 0) {
> +        err = update_subtbl_masks_hd(head_fd, free_slot);
> +        if (err) {
> +            goto err_subtbl;
> +        }
> +    } else {
> +        pentry->next = free_slot;
> +        /* This effectively only updates one byte of entry->next */
> +        if (bpf_map_update_elem(masks_fd, &pidx, pentry, 0)) {
> +            err = errno;
> +            VLOG_ERR_RL(&rl, "Cannot update subtbl_masks prev entry: %s",
> +                        ovs_strerror(errno));
> +            goto err_subtbl;
> +        }
> +    }
> +    delete_subtbl_masks_free_slot(netdev_info, free_slot);
> +out:
> +    hash = hash_bytes(ufid, sizeof *ufid, 0);
> +    data = xzalloc(sizeof *data);
> +    data->ufid = *ufid;
> +    miniflow_clone(&data->mask.masks, &minimatch.mask->masks,
> +                   miniflow_n_values(&minimatch.mask->masks));
> +    miniflow_clone(&data->flow, minimatch.flow,
> +                   miniflow_n_values(minimatch.flow));
> +    ovs_mutex_lock(&ufid_lock);
> +    hmap_insert(&ufid_to_xdp, &data->ufid_node, hash);
> +    ovs_mutex_unlock(&ufid_lock);
> +err_close:
> +    close(subtbl_fd);
> +err_entry:
> +    free(pentry);
> +    free(entry);
> +err_actions:
> +    free(xdp_actions);
> +err:
> +    minimatch_destroy(&minimatch);
> +
> +    return err;
> +
> +err_subtbl:
> +    bpf_map_delete_elem(flow_table_fd, &free_slot);
> +
> +    goto err_close;
> +}
> +
> +static int
> +netdev_xdp_flow_get(struct netdev *netdev OVS_UNUSED,
> +                    struct match *match OVS_UNUSED,
> +                    struct nlattr **actions OVS_UNUSED,
> +                    const ovs_u128 *ufid OVS_UNUSED,
> +                    struct dpif_flow_stats *stats OVS_UNUSED,
> +                    struct dpif_flow_attrs *attrs OVS_UNUSED,
> +                    struct ofpbuf *buf OVS_UNUSED)
> +{
> +    /* TODO: Implement this */
> +    return 0;
> +}
> +
> +static int
> +netdev_xdp_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
> +                    struct dpif_flow_stats *stats OVS_UNUSED)
> +{
> +    struct netdev_info *netdev_info;
> +    struct bpf_object *obj = get_xdp_object(netdev);
> +    size_t hash;
> +    struct ufid_to_xdp_data *data;
> +    int masks_fd, head_fd, flow_table_fd, subtbl_fd, head;
> +    struct xdp_subtable_mask_header *entry, *pentry;
> +    int err, cnt, idx, pidx;
> +    __u32 id;
> +
> +    netdev_info = get_netdev_info(netdev);
> +    if (!netdev_info) {
> +        return ENOENT;
> +    }
> +
> +    err = get_subtbl_masks_fd(obj, &masks_fd);
> +    if (err) {
> +        return err;
> +    }
> +
> +    err = get_subtbl_masks_hd_fd(obj, &head_fd);
> +    if (err) {
> +        return err;
> +    }
> +
> +    err = get_subtbl_masks_hd(head_fd, &head);
> +    if (err) {
> +        return err;
> +    }
> +
> +    err = get_flow_table_fd(obj, &flow_table_fd);
> +    if (err) {
> +        return err;
> +    }
> +
> +    hash = hash_bytes(ufid, sizeof *ufid, 0);
> +    ovs_mutex_lock(&ufid_lock);
> +    HMAP_FOR_EACH_WITH_HASH(data, ufid_node, hash, &ufid_to_xdp) {
> +        if (ovs_u128_equals(*ufid, data->ufid)) {
> +            break;
> +        }
> +    }
> +    if (!data) {
> +        ovs_mutex_unlock(&ufid_lock);
> +        VLOG_WARN_RL(&rl, "Cannot find flow key to delete");
> +        return ENOENT;
> +    }
> +    hmap_remove(&ufid_to_xdp, &data->ufid_node);
> +    ovs_mutex_unlock(&ufid_lock);
> +
> +    entry = xzalloc(netdev_info->subtable_mask_size);
> +    pentry = xzalloc(netdev_info->subtable_mask_size);
> +
> +    /* Iterate subtable mask list implemented using array */
> +    idx = head;
> +    for (cnt = 0; cnt < netdev_info->max_subtables; cnt++) {
> +        if (idx == XDP_SUBTABLES_TAIL) {
> +            err = ENOENT;
> +            VLOG_ERR_RL(&rl, "Cannot lookup subtbl_masks: %s",
> +                        ovs_strerror(err));
> +            goto out;
> +        }
> +
> +        if (bpf_map_lookup_elem(masks_fd, &idx, entry)) {
> +            err = errno;
> +            VLOG_ERR_RL(&rl, "Cannot lookup subtbl_masks: %s",
> +                        ovs_strerror(errno));
> +            goto out;
> +        }
> +
> +        if (minimask_equal(&data->mask, &entry->mask)) {
> +            break;
> +        }
> +
> +        memcpy(pentry, entry, netdev_info->subtable_mask_size);
> +        pidx = idx;
> +        idx = entry->next;
> +    }
> +
> +    if (cnt == netdev_info->max_subtables) {
> +        err = ENOENT;
> +        VLOG_ERR_RL(&rl,
> +                    "Cannot lookup subtbl_masks: Broken subtbl_masks list");
> +        goto out;
> +    }
> +
> +    if (bpf_map_lookup_elem(flow_table_fd, &idx, &id)) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Cannot lookup flow_table: %s", 
> ovs_strerror(errno));
> +        goto out;
> +    }
> +
> +    subtbl_fd = bpf_map_get_fd_by_id(id);
> +    if (subtbl_fd < 0) {
> +        err = errno;
> +        VLOG_ERR_RL(&rl, "Cannot get subtbl fd by id: %s",
> +                    ovs_strerror(errno));
> +        goto out;
> +    }
> +
> +    bpf_map_delete_elem(subtbl_fd, miniflow_get_values(&data->flow));
> +    close(subtbl_fd);
> +
> +    if (--entry->count > 0) {
> +        if (bpf_map_update_elem(masks_fd, &idx, entry, 0)) {
> +            err = errno;
> +            VLOG_ERR_RL(&rl, "Cannot update subtbl_masks count: %s",
> +                        ovs_strerror(errno));
> +        }
> +
> +        goto out;
> +    }
> +
> +    if (entry->count == (uint16_t)-1) {
> +        VLOG_WARN_RL(&rl, "subtbl_masks has negative count: %d",
> +                     entry->count);
> +    }
> +
> +    if (cnt == 0) {
> +        err = update_subtbl_masks_hd(head_fd, entry->next);
> +        if (err) {
> +            goto out;
> +        }
> +    } else {
> +        pentry->next = entry->next;
> +        /* This effectively only updates one byte of entry->next */
> +        if (bpf_map_update_elem(masks_fd, &pidx, pentry, 0)) {
> +            err = errno;
> +            VLOG_ERR_RL(&rl, "Cannot update subtbl_masks prev entry: %s",
> +                        ovs_strerror(errno));
> +            goto out;
> +        }
> +    }
> +
> +    bpf_map_delete_elem(flow_table_fd, &idx);
> +    err = add_subtbl_masks_free_slot(netdev_info, idx);
> +    if (err) {
> +        VLOG_ERR_RL(&rl, "Cannot add subtbl_masks free slot: %s",
> +                    ovs_strerror(err));
> +    }
> +out:
> +    free(data);
> +    free(pentry);
> +    free(entry);
> +
> +    return err;
> +}
> +
> +static int
> +netdev_xdp_init_flow_api(struct netdev *netdev)
> +{
> +    struct bpf_object *obj;
> +    struct btf *btf;
> +    struct netdev_info *netdev_info;
> +    struct bpf_map *flow_table, *subtbl_template, *subtbl_masks;
> +    const struct bpf_map_def *flow_table_def, *subtbl_def, *subtbl_masks_def;
> +    odp_port_t port;
> +    size_t port_hash;
> +    int output_map_fd;
> +    int err, ifindex, devmap_idx;
> +
> +    if (!has_xdp_flowtable(netdev)) {
> +        return EOPNOTSUPP;
> +    }
> +
> +    ifindex = netdev_get_ifindex(netdev);
> +    if (ifindex < 0) {
> +        VLOG_ERR("Failed to get ifindex for %s: %s",
> +                 netdev_get_name(netdev), ovs_strerror(-ifindex));
> +        return -ifindex;
> +    }
> +    port = netdev_ifindex_to_odp_port(ifindex);
> +
> +    netdev_info = find_netdev_info(port);
> +    if (netdev_info) {
> +        VLOG_ERR("xdp offload is already initialized for netdev %s",
> +                 netdev_get_name(netdev));
> +        return EEXIST;
> +    }
> +
> +    obj = get_xdp_object(netdev);
> +    err = get_new_devmap_idx(&devmap_idx);
> +    if (err) {
> +        VLOG_ERR("Failed to get new devmap idx: %s", ovs_strerror(err));
> +        return err;
> +    }
> +
> +    netdev_info = xzalloc(sizeof *netdev_info);
> +    netdev_info->devmap_idx = devmap_idx;
> +
> +    if (get_odp_port(netdev, &netdev_info->port) || !netdev_info->port) {
> +        VLOG_ERR("Failed to get odp_port for %s", netdev_get_name(netdev));
> +        err = ENOENT;
> +        goto err;
> +    }
> +
> +    btf = bpf_object__btf(obj);
> +    if (!btf) {
> +        VLOG_ERR("BUG: BPF object for netdev \"%s\" does not contain BTF",
> +                 netdev_get_name(netdev));
> +        err = EINVAL;
> +        goto err;
> +    }
> +
> +    err = probe_meta_info(netdev_info, btf);
> +    if (err) {
> +        VLOG_ERR("Failed to initialize xdp offload metadata for %s",
> +                 netdev_get_name(netdev));
> +        goto err;
> +    }
> +
> +    flow_table = bpf_object__find_map_by_name(obj, "flow_table");
> +    if (!flow_table) {
> +        VLOG_ERR("BUG: BPF map \"flow_table\" not found");
> +        err = ENOENT;
> +        goto err;
> +    }
> +    flow_table_def = bpf_map__def(flow_table);
> +    if (flow_table_def->max_entries > XDP_MAX_SUBTABLES) {
> +        VLOG_ERR("flow_table max_entries must not be greater than %d",
> +                 XDP_MAX_SUBTABLES);
> +        goto err;
> +    }
> +    netdev_info->max_subtables = flow_table_def->max_entries;
> +
> +    subtbl_template = bpf_object__find_map_by_name(obj, "subtbl_template");
> +    if (!subtbl_template) {
> +        VLOG_ERR("BUG: BPF map \"subtbl_template\" not found");
> +        err = ENOENT;
> +        goto err;
> +    }
> +    subtbl_def = bpf_map__def(subtbl_template);
> +    netdev_info->key_size = subtbl_def->key_size;
> +    netdev_info->max_actions_len = subtbl_def->value_size;
> +    netdev_info->max_entries = subtbl_def->max_entries;
> +
> +    subtbl_masks = bpf_object__find_map_by_name(obj, "subtbl_masks");
> +    if (!subtbl_masks) {
> +        VLOG_ERR("BPF map \"subtbl_masks\" not found");
> +        err = ENOENT;
> +        goto err;
> +    }
> +    subtbl_masks_def = bpf_map__def(subtbl_masks);
> +    if (subtbl_masks_def->max_entries != netdev_info->max_subtables) {
> +        VLOG_ERR("\"subtbl_masks\" map has different max_entries from 
> \"flow_table\"");
> +        goto err;
> +    }
> +    netdev_info->subtable_mask_size = subtbl_masks_def->value_size;
> +    init_subtbl_masks_free_slot(netdev_info);
> +
> +    err = get_output_map_fd(obj, &output_map_fd);
> +    if (err) {
> +        goto err;
> +    }
> +    if (bpf_map_update_elem(output_map_fd, &devmap_idx, &ifindex, 0)) {
> +        err = errno;
> +        VLOG_ERR("Failed to insert idx %d if %s into output_map: %s",
> +                 devmap_idx, netdev_get_name(netdev), ovs_strerror(errno));
> +        goto err;
> +    }
> +
> +    port_hash = hash_bytes(&port, sizeof port, 0);
> +    hmap_insert(&netdev_info_table, &netdev_info->port_node, port_hash);
> +
> +    return 0;
> +err:
> +    free(netdev_info);
> +    delete_devmap_idx(devmap_idx);
> +    return err;
> +}
> +
> +static void
> +netdev_xdp_uninit_flow_api(struct netdev *netdev)
> +{
> +    struct bpf_object *obj;
> +    struct netdev_info *netdev_info;
> +    int output_map_fd, devmap_idx;
> +
> +    netdev_info = get_netdev_info(netdev);
> +    if (!netdev_info) {
> +        VLOG_WARN("%s: netdev_info not found on uninitializing xdp flow api",
> +                  netdev_get_name(netdev));
> +        return;
> +    }
> +    hmap_remove(&netdev_info_table, &netdev_info->port_node);
> +
> +    devmap_idx = netdev_info->devmap_idx;
> +    obj = get_xdp_object(netdev);
> +    if (!get_output_map_fd(obj, &output_map_fd)) {
> +        bpf_map_delete_elem(output_map_fd, &devmap_idx);
> +    } else {
> +        VLOG_WARN("%s: Failed to get output_map fd on uninitializing xdp 
> flow api",
> +                  netdev_get_name(netdev));
> +    }
> +
> +    free(netdev_info);
> +    delete_devmap_idx(devmap_idx);
> +}
> +
> +const struct netdev_flow_api netdev_offload_xdp = {
> +    .type = "linux_xdp",
> +    .flow_put = netdev_xdp_flow_put,
> +    .flow_get = netdev_xdp_flow_get,
> +    .flow_del = netdev_xdp_flow_del,
> +    .init_flow_api = netdev_xdp_init_flow_api,
> +    .uninit_flow_api = netdev_xdp_uninit_flow_api,
> +};
> diff --git a/lib/netdev-offload-xdp.h b/lib/netdev-offload-xdp.h
> new file mode 100644
> index 000000000..800a0c0e5
> --- /dev/null
> +++ b/lib/netdev-offload-xdp.h
> @@ -0,0 +1,49 @@
> +/*
> + * Copyright (c) 2020 NTT Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef NETDEV_OFFLOAD_XDP_H
> +#define NETDEV_OFFLOAD_XDP_H 1
> +
> +#include "flow.h"
> +
> +#define XDP_MAX_PORTS 65536
> +/* XDP_MAX_SUBTABLES must be <= 255 to fit in 1 byte with 1 value reserved
> + * for TAIL */
> +#define XDP_MAX_SUBTABLES 128
> +#define XDP_SUBTABLES_TAIL XDP_MAX_SUBTABLES
> +
> +struct xdp_subtable_mask_header {
> +    uint16_t count;
> +    uint8_t mf_bits_u0;
> +    uint8_t mf_bits_u1;
> +    int next;
> +    struct minimask mask;
> +};
> +
> +struct xdp_flow_actions_header {
> +    size_t actions_len;
> +    /* Followed by netlink attributes (actions) */
> +};
> +
> +struct nlattr;
> +
> +static inline struct nlattr *
> +xdp_flow_actions(struct xdp_flow_actions_header *header)
> +{
> +    return (struct nlattr *)(header + 1);
> +}
> +
> +#endif /* netdev-offload-xdp.h */
> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
> index 669513646..f14f77420 100644
> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -185,6 +185,9 @@ netdev_assign_flow_api(struct netdev *netdev)
>              if (!current_rfa ||
>                  (!netdev_flow_api_driver &&
>                   !strcmp(FLOW_API_DRIVER_DEFAULT, rfa->flow_api->type))) {
> +                if (current_rfa && current_rfa->flow_api->uninit_flow_api) {
> +                    current_rfa->flow_api->uninit_flow_api(netdev);
> +                }
>                  current_rfa = rfa;
>              }
>          } else {
> @@ -326,6 +329,9 @@ netdev_uninit_flow_api(struct netdev *netdev)
>
>      ovsrcu_set(&netdev->flow_api, NULL);
>      rfa = netdev_lookup_flow_api(flow_api->type);
> +    if (rfa->flow_api->uninit_flow_api) {
> +        rfa->flow_api->uninit_flow_api(netdev);
> +    }
>      ovs_refcount_unref(&rfa->refcnt);
>  }
>
> --
> 2.25.1
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to