Thanks for the patch. My comments below: On Mon, Jun 29, 2020 at 8:30 AM Toshiaki Makita <toshiaki.maki...@gmail.com> wrote: > > This provider offloads classifier to software XDP. > > It works only when a custom XDP object is loaded by afxdp netdev. > The BPF program needs to implement classifier with array-of-maps for > subtable hashmaps and arraymap for subtable masks. The flow api > provider detects classifier support in the custom XDP program when > loading it. > > The requirements for the BPF program is as below: > > - A struct named "meta_info" in ".ovs_meta" section > inform vswitchd of supported keys, actions, and the max number of > actions. > > - A map named "subtbl_masks" > An array which implements a list. The entry contains struct > xdp_subtables_mask provided by lib/netdev-offload-xdp.h followed by > miniflow buf of any length for the mask. > > - A map named "subtbl_masks_hd" > A one entry int array which contains the first entry index of the list > implemented by subtbl_masks. > > - A map named "flow_table" > An array-of-maps. Each entry points to subtable hash-map instantiated > with the following "subtbl_template". > The entry of each index of subtbl_masks has miniflow mask of subtable > in the corresponding index of flow_table. > > - A map named "subtbl_template" > A template for subtable, used for entries of "flow_table". > The key must be miniflow buf (without leading map). > > - A map named "output_map" > A devmap. Each entry represents odp port. > > - A map named "xsks_map" > A xskmap. Used for upcall.
Maybe later on, we can add the above to Documentation/ and explain the purpose of these maps there. > > For more details, refer to the reference BPF program in next commit. > > In the future it may be possible to offload classifier to SmartNIC > through XDP, but currently it's not because map-in-map is not supported > by XDP hw-offload. I think it will be hard, given that only Netronome supports XDP offload today. > > Signed-off-by: Toshiaki Makita <toshiaki.maki...@gmail.com> > --- > lib/automake.mk | 6 +- > lib/bpf-util.c | 38 + > lib/bpf-util.h | 22 + > lib/netdev-afxdp.c | 205 ++++- > lib/netdev-afxdp.h | 3 + > lib/netdev-linux-private.h | 2 + > lib/netdev-offload-provider.h | 6 + > lib/netdev-offload-xdp.c | 1315 +++++++++++++++++++++++++++++++++ > lib/netdev-offload-xdp.h | 49 ++ > lib/netdev-offload.c | 6 + > 10 files changed, 1650 insertions(+), 2 deletions(-) > create mode 100644 lib/bpf-util.c > create mode 100644 lib/bpf-util.h > create mode 100644 lib/netdev-offload-xdp.c > create mode 100644 lib/netdev-offload-xdp.h > > diff --git a/lib/automake.mk b/lib/automake.mk > index 86940ccd2..1fa1371f3 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -421,10 +421,14 @@ endif > > if HAVE_AF_XDP > lib_libopenvswitch_la_SOURCES += \ > + lib/bpf-util.c \ > + lib/bpf-util.h \ > lib/netdev-afxdp-pool.c \ > lib/netdev-afxdp-pool.h \ > lib/netdev-afxdp.c \ > - lib/netdev-afxdp.h > + lib/netdev-afxdp.h \ > + lib/netdev-offload-xdp.c \ > + lib/netdev-offload-xdp.h > endif How about doing s.t like: --enable-afxdp: the current one on master without the xdp offload program --enable-afxdp-with-bpf: the afxdp one plus your xdp offload program So that when users only --enable-afxdp, it doesn't need to compile in the lib/netdev-offload-xdp.* and bpf/* > > if DPDK_NETDEV > diff --git a/lib/bpf-util.c b/lib/bpf-util.c > new file mode 100644 > index 000000000..324cfbe1d > --- /dev/null > +++ b/lib/bpf-util.c > @@ -0,0 +1,38 @@ > +/* > + * Copyright (c) 2020 NTT Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > + > +#include "bpf-util.h" > + > +#include <bpf/libbpf.h> > + > +#include "ovs-thread.h" > + > +DEFINE_STATIC_PER_THREAD_DATA(struct { char s[128]; }, > + libbpf_strerror_buffer, > + { "" }); > + > +const char * > +ovs_libbpf_strerror(int err) > +{ > + enum { BUFSIZE = sizeof libbpf_strerror_buffer_get()->s }; just curious: what's the benefit of using enum {BUFSIZE ...} here? > + char *buf = libbpf_strerror_buffer_get()->s; > + > + libbpf_strerror(err, buf, BUFSIZE); > + > + return buf; > +} > diff --git a/lib/bpf-util.h b/lib/bpf-util.h > new file mode 100644 > index 000000000..6346935b3 > --- /dev/null > +++ b/lib/bpf-util.h > @@ -0,0 +1,22 @@ > +/* > + * Copyright (c) 2020 NTT Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef BPF_UTIL_H > +#define BPF_UTIL_H 1 > + > +const char *ovs_libbpf_strerror(int err); > + > +#endif /* bpf-util.h */ > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > index 2b2b811e2..9229ae1b0 100644 > --- a/lib/netdev-afxdp.c > +++ b/lib/netdev-afxdp.c > @@ -22,6 +22,7 @@ > #include "netdev-afxdp-pool.h" > > #include <bpf/bpf.h> > +#include <bpf/btf.h> > #include <errno.h> > #include <inttypes.h> > #include <linux/rtnetlink.h> > @@ -37,10 +38,13 @@ > #include <sys/types.h> > #include <unistd.h> > > +#include "bpf-util.h" > #include "coverage.h" > #include "dp-packet.h" > #include "dpif-netdev.h" > #include "fatal-signal.h" > +#include "netdev-offload-provider.h" > +#include "netdev-offload-xdp.h" > #include "openvswitch/compiler.h" > #include "openvswitch/dynamic-string.h" > #include "openvswitch/list.h" > @@ -261,10 +265,203 @@ netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED) > ovs_mutex_unlock(&unused_pools_mutex); > } > > +bool > +has_xdp_flowtable(struct netdev *netdev) > +{ > + struct netdev_linux *dev = netdev_linux_cast(netdev); > + > + return dev->has_xdp_flowtable; > +} > + > +struct bpf_object * > +get_xdp_object(struct netdev *netdev) > +{ > + struct netdev_linux *dev = netdev_linux_cast(netdev); > + > + return dev->xdp_obj; > +} > + > +static int > +xdp_preload(struct netdev *netdev, struct bpf_object *obj) > +{ > + static struct ovsthread_once output_map_once = > OVSTHREAD_ONCE_INITIALIZER; > + struct btf *btf; > + struct bpf_map *flow_table, *subtbl_template, *subtbl_masks_hd, > *xsks_map, > + *output_map; > + const struct bpf_map_def *flow_table_def, *subtbl_def; > + int flow_table_fd, subtbl_meta_fd, xsks_map_fd; > + static int output_map_fd = -1; > + int n_rxq, err; > + > + btf = bpf_object__btf(obj); > + if (!btf) { > + VLOG_DBG("BPF object for netdev \"%s\" does not contain BTF", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + if (btf__find_by_name_kind(btf, ".ovs_meta", BTF_KIND_DATASEC) < 0) { > + VLOG_DBG("\".ovs_meta\" datasec not found in BTF"); > + return EOPNOTSUPP; > + } > + > + flow_table = bpf_object__find_map_by_name(obj, "flow_table"); > + if (!flow_table) { > + VLOG_DBG("%s: \"flow_table\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + subtbl_template = bpf_object__find_map_by_name(obj, "subtbl_template"); > + if (!subtbl_template) { > + VLOG_DBG("%s: \"subtbl_template\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + output_map = bpf_object__find_map_by_name(obj, "output_map"); > + if (!output_map) { > + VLOG_DBG("%s: \"output_map\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + if (ovsthread_once_start(&output_map_once)) { > + /* XXX: close output_map_fd somewhere? */ maybe at uninit_flow_api, we have to check whether there is still a netdev using linux_xdp offload. And if not, close this map? > + output_map_fd = bpf_create_map_name(BPF_MAP_TYPE_DEVMAP, > "output_map", > + sizeof(int), sizeof(int), > + XDP_MAX_PORTS, 0); > + if (output_map_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for output_map failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + ovsthread_once_done(&output_map_once); > + return err; > + } > + ovsthread_once_done(&output_map_once); > + } > + > + flow_table_def = bpf_map__def(flow_table); > + if (flow_table_def->type != BPF_MAP_TYPE_ARRAY_OF_MAPS) { > + VLOG_WARN("%s: \"flow_table\" map type is not map-in-map array.", > + netdev_get_name(netdev)); > + return EINVAL; > + } > + > + subtbl_def = bpf_map__def(subtbl_template); > + if (subtbl_def->type != BPF_MAP_TYPE_HASH) { > + VLOG_WARN("%s: \"subtbl_templates\" map type is not hash-table", > + netdev_get_name(netdev)); > + return EINVAL; > + } > + > + subtbl_meta_fd = bpf_create_map(BPF_MAP_TYPE_HASH, subtbl_def->key_size, > + subtbl_def->value_size, > + subtbl_def->max_entries, 0); > + if (subtbl_meta_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for flow_table meta table failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + return err; > + } > + flow_table_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, > + "flow_table", sizeof(uint32_t), > + subtbl_meta_fd, > + flow_table_def->max_entries, > + 0); > + if (flow_table_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for flow_table failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + close(subtbl_meta_fd); > + return err; > + } > + close(subtbl_meta_fd); > + > + err = bpf_map__reuse_fd(flow_table, flow_table_fd); > + if (err) { > + VLOG_WARN("%s: Failed to reuse flow_table fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + close(flow_table_fd); > + return EINVAL; > + } > + close(flow_table_fd); > + > + subtbl_masks_hd = bpf_object__find_map_by_name(obj, "subtbl_masks_hd"); > + if (!subtbl_masks_hd) { > + /* Head index can be a global variable. In that case libbpf will > + * initialize it. */ > + VLOG_DBG("%s: \"subtbl_masks_hd\" map does not exist", > + netdev_get_name(netdev)); > + } else { > + int head_fd, head = XDP_SUBTABLES_TAIL, zero = 0; > + > + head_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "subtbl_masks_hd", > + sizeof(uint32_t), sizeof(int), 1, 0); > + if (head_fd < 0) { > + err = errno; > + VLOG_ERR("%s: Map creation of \"subtbl_masks_hd\" failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + return err; > + } > + > + if (bpf_map_update_elem(head_fd, &zero, &head, 0)) { > + err = errno; > + VLOG_ERR("Cannot update subtbl_masks_hd: %s", > + ovs_strerror(errno)); > + return err; > + } > + > + err = bpf_map__reuse_fd(subtbl_masks_hd, head_fd); > + if (err) { > + VLOG_ERR("%s: Failed to reuse subtbl_masks_hd fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + close(head_fd); > + return EINVAL; > + } > + close(head_fd); > + } > + > + xsks_map = bpf_object__find_map_by_name(obj, "xsks_map"); > + if (!xsks_map) { > + VLOG_ERR("%s: BUG: \"xsks_map\" map does not exist", > + netdev_get_name(netdev)); > + return EOPNOTSUPP; > + } > + > + n_rxq = netdev_n_rxq(netdev); > + xsks_map_fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", > + sizeof(int), sizeof(int), n_rxq, 0); > + if (xsks_map_fd < 0) { > + err = errno; > + VLOG_WARN("%s: Map creation for xsks_map failed: %s", > + netdev_get_name(netdev), ovs_strerror(errno)); > + return err; > + } > + > + err = bpf_map__reuse_fd(xsks_map, xsks_map_fd); > + if (err) { > + VLOG_WARN("%s: Failed to reuse xsks_map fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + close(xsks_map_fd); > + return EINVAL; > + } > + close(xsks_map_fd); > + > + err = bpf_map__reuse_fd(output_map, output_map_fd); > + if (err) { > + VLOG_WARN("%s: Failed to reuse output_map fd: %s", > + netdev_get_name(netdev), ovs_libbpf_strerror(err)); > + return EINVAL; > + } > + > + return 0; > +} > + > static int > xsk_load_prog(struct netdev *netdev, const char *path, struct bpf_object > **pobj, > int *prog_fd) > { > + struct netdev_linux *dev = netdev_linux_cast(netdev); > struct bpf_object_open_attr attr = { > .prog_type = BPF_PROG_TYPE_XDP, > .file = path, > @@ -298,6 +495,12 @@ xsk_load_prog(struct netdev *netdev, const char *path, > struct bpf_object **pobj, > goto err; > } > > + if (!xdp_preload(netdev, obj)) { > + VLOG_INFO("%s: Detected flowtable support in XDP program", > + netdev_get_name(netdev)); > + dev->has_xdp_flowtable = true; > + } > + > if (bpf_object__load(obj)) { > VLOG_ERR("%s: Can't load XDP program at '%s'", > netdev_get_name(netdev), path); > @@ -1297,7 +1500,7 @@ libbpf_print(enum libbpf_print_level level, > int netdev_afxdp_init(void) > { > libbpf_set_print(libbpf_print); > - return 0; > + return netdev_register_flow_api_provider(&netdev_offload_xdp); maybe use the ovsthread_once_start(&once) to make sure it is register only once. > } > > int > diff --git a/lib/netdev-afxdp.h b/lib/netdev-afxdp.h > index e91cd102d..324152e8f 100644 > --- a/lib/netdev-afxdp.h > +++ b/lib/netdev-afxdp.h > @@ -44,6 +44,7 @@ struct netdev_stats; > struct smap; > struct xdp_umem; > struct xsk_socket_info; > +struct bpf_object; > > int netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_); > void netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_); > @@ -70,6 +71,8 @@ int netdev_afxdp_get_custom_stats(const struct netdev > *netdev, > void free_afxdp_buf(struct dp_packet *p); > int netdev_afxdp_reconfigure(struct netdev *netdev); > void signal_remove_xdp(struct netdev *netdev); > +bool has_xdp_flowtable(struct netdev *netdev); > +struct bpf_object *get_xdp_object(struct netdev *netdev); > > #else /* !HAVE_AF_XDP */ > > diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h > index 0f8823c45..876510cb7 100644 > --- a/lib/netdev-linux-private.h > +++ b/lib/netdev-linux-private.h > @@ -121,6 +121,8 @@ struct netdev_linux { > const char *xdp_obj_path; /* XDP object file path. */ > const char *requested_xdp_obj; > struct bpf_object *xdp_obj; > + > + bool has_xdp_flowtable; > #endif > }; > > diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h > index 0bed7bf61..7147a5166 100644 > --- a/lib/netdev-offload-provider.h > +++ b/lib/netdev-offload-provider.h > @@ -86,6 +86,9 @@ struct netdev_flow_api { > /* Initializies the netdev flow api. typo: initializes > * Return 0 if successful, otherwise returns a positive errno value. */ > int (*init_flow_api)(struct netdev *); > + > + /* Uninitialize the netdev flow api. */ > + void (*uninit_flow_api)(struct netdev *); > }; > > int netdev_register_flow_api_provider(const struct netdev_flow_api *); > @@ -94,6 +97,9 @@ bool netdev_flow_api_equals(const struct netdev *, const > struct netdev *); > > #ifdef __linux__ > extern const struct netdev_flow_api netdev_offload_tc; > +#ifdef HAVE_AF_XDP > +extern const struct netdev_flow_api netdev_offload_xdp; > +#endif > #endif > > #ifdef DPDK_NETDEV > diff --git a/lib/netdev-offload-xdp.c b/lib/netdev-offload-xdp.c > new file mode 100644 > index 000000000..53eb24557 > --- /dev/null > +++ b/lib/netdev-offload-xdp.c > @@ -0,0 +1,1315 @@ > +/* > + * Copyright (c) 2020 NTT Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > + > +#include "netdev-offload-xdp.h" > + > +#include <bpf/bpf.h> > +#include <bpf/btf.h> > +#include <bpf/libbpf.h> > +#include <errno.h> > +#include <unistd.h> > + > +#include "bpf-util.h" > +#include "dpif.h" > +#include "hash.h" > +#include "netdev.h" > +#include "netdev-offload-provider.h" > +#include "netlink.h" > +#include "odp-netlink.h" > +#include "openvswitch/hmap.h" > +#include "openvswitch/match.h" > +#include "openvswitch/vlog.h" > + > +VLOG_DEFINE_THIS_MODULE(netdev_offload_xdp); > +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5); > + > +static struct hmap ufid_to_xdp = HMAP_INITIALIZER(&ufid_to_xdp); > + > +static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER; > + > +struct ufid_to_xdp_data { > + struct hmap_node ufid_node; > + ovs_u128 ufid; > + struct minimask mask; > + uint64_t mask_buf[FLOW_MAX_PACKET_U64S]; > + struct miniflow flow; > + uint64_t flow_buf[FLOW_MAX_PACKET_U64S]; > +}; > + > +static struct hmap netdev_info_table = HMAP_INITIALIZER(&netdev_info_table); > + > +struct netdev_info { the "netdev_info" makes me think this is something generic to netdev. But this is actually XDP-specific. How about changing the name to netdev_xdp_info? > + struct hmap_node port_node; > + struct netdev *netdev; > + odp_port_t port; > + uint32_t devmap_idx; > + struct miniflow supported_keys; > + uint64_t supported_keys_buf[FLOW_MAX_PACKET_U64S]; > + uint32_t supported_actions; > + uint32_t max_subtables; > + uint32_t subtable_mask_size; > + uint32_t key_size; > + uint32_t max_actions; > + uint32_t max_actions_len; > + uint32_t max_entries; > + int free_slot_top; > + int free_slots[XDP_MAX_SUBTABLES]; > +}; > + > +static struct hmap devmap_idx_table = HMAP_INITIALIZER(&devmap_idx_table); > + > +struct devmap_idx_data { > + struct hmap_node node; > + int devmap_idx; > +}; > + > + > +/* Free entry managemant for list implementation using array */ type: management I see you're maintaining a resource pool containing free slot for subtable. I'm wondering if lib/id-pool.c can be used here? > + > +static void > +init_subtbl_masks_free_slot(struct netdev_info *netdev_info) > +{ > + int i; > + int max_subtables = netdev_info->max_subtables; > + > + for (i = 0; i < max_subtables; i++) { > + netdev_info->free_slots[max_subtables - 1 - i] = i; > + } > + netdev_info->free_slot_top = max_subtables - 1; > +} > + > +static int > +get_subtbl_masks_free_slot(const struct netdev_info *netdev_info, int *slot) > +{ > + if (netdev_info->free_slot_top < 0) { > + return ENOBUFS; > + } > + > + *slot = netdev_info->free_slots[netdev_info->free_slot_top]; > + return 0; > +} > + > +static int > +add_subtbl_masks_free_slot(struct netdev_info *netdev_info, int slot) > +{ > + if (netdev_info->free_slot_top >= netdev_info->max_subtables - 1) { > + VLOG_ERR_RL(&rl, "BUG: free_slot overflow: top=%d, slot=%d", > + netdev_info->free_slot_top, slot); > + return EOVERFLOW; > + } > + > + netdev_info->free_slots[++netdev_info->free_slot_top] = slot; > + return 0; > +} > + > +static void > +delete_subtbl_masks_free_slot(struct netdev_info *netdev_info, int slot) > +{ > + int top_slot; > + > + if (netdev_info->free_slot_top < 0) { > + VLOG_ERR_RL(&rl, "BUG: free_slot underflow: top=%d, slot=%d", > + netdev_info->free_slot_top, slot); > + return; > + } > + > + top_slot = netdev_info->free_slots[netdev_info->free_slot_top]; > + if (top_slot != slot) { > + VLOG_ERR_RL(&rl, > + "BUG: inconsistent free_slot top: top_slot=%d, slot=%d", > + top_slot, slot); > + return; > + } > + > + netdev_info->free_slot_top--; > +} > + > + > +#define FLOW_MASK_FIELD(MASK, FIELD) \ > + memset(&(MASK).FIELD, 0xff, sizeof (MASK).FIELD) > + > +static int > +probe_supported_keys(struct netdev_info *netdev_info, struct btf *btf, > + uint32_t type) > +{ > + struct miniflow *mf = &netdev_info->supported_keys; > + struct flowmap *map = &mf->map; > + struct flow mask; > + struct btf_member *m; > + const struct btf_type *pt, *t; > + int i; > + > + pt = btf__type_by_id(btf, type); > + if (!pt) { > + VLOG_ERR("\"supported_keys\" field type is unknown"); > + return EINVAL; > + } > + if (!btf_is_ptr(pt)) { > + VLOG_ERR("\"supported_keys\" field is not ptr"); > + return EINVAL; > + } > + t = btf__type_by_id(btf, pt->type); > + if (!t) { > + VLOG_ERR("\"supported_keys\" ptr type is unknown"); > + return EINVAL; > + } > + if (!btf_is_struct(t)) { > + VLOG_ERR("\"supported_keys\" field is not struct ptr"); > + return EINVAL; > + } > + > + memset(&mask, 0, sizeof mask); > + flowmap_init(map); > + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { > + const char *name = btf__name_by_offset(btf, m->name_off); > + > + if (!name) { > + VLOG_ERR("Unnamed field #%d in \"supported_keys\" struct", i); > + return EINVAL; > + } > + if (!strcmp(name, "dl_dst")) { > + FLOWMAP_SET(map, dl_dst); > + FLOW_MASK_FIELD(mask, dl_dst); > + } else if (!strcmp(name, "dl_src")) { > + FLOWMAP_SET(map, dl_src); > + FLOW_MASK_FIELD(mask, dl_src); > + } else if (!strcmp(name, "dl_type")) { > + FLOWMAP_SET(map, dl_type); > + FLOW_MASK_FIELD(mask, dl_type); > + } else if (!strcmp(name, "vlans")) { > + const struct btf_type *vt; > + const struct btf_array *arr; > + > + FLOWMAP_SET(map, vlans); > + vt = btf__type_by_id(btf, m->type); > + if (!vt) { > + VLOG_ERR("\"vlans\" field type is unknown"); > + return EINVAL; > + } > + if (!btf_is_array(vt)) { > + VLOG_ERR("\"vlans\" field is not array"); > + return EINVAL; > + } > + arr = btf_array(vt); > + if (arr->nelems > 2) { > + VLOG_ERR("\"vlans\" elems too many: %u", arr->nelems); > + return EINVAL; > + } > + memset(&mask.vlans, 0xff, sizeof mask.vlans[0] * arr->nelems); > + } else if (!strcmp(name, "nw_src")) { > + FLOWMAP_SET(map, nw_src); > + FLOW_MASK_FIELD(mask, nw_src); > + } else if (!strcmp(name, "nw_dst")) { > + FLOWMAP_SET(map, nw_dst); > + FLOW_MASK_FIELD(mask, nw_dst); > + } else if (!strcmp(name, "nw_frag")) { > + FLOWMAP_SET(map, nw_frag); > + FLOW_MASK_FIELD(mask, nw_frag); > + } else if (!strcmp(name, "nw_tos")) { > + FLOWMAP_SET(map, nw_tos); > + FLOW_MASK_FIELD(mask, nw_tos); > + } else if (!strcmp(name, "nw_ttl")) { > + FLOWMAP_SET(map, nw_ttl); > + FLOW_MASK_FIELD(mask, nw_ttl); > + } else if (!strcmp(name, "nw_proto")) { > + FLOWMAP_SET(map, nw_proto); > + FLOW_MASK_FIELD(mask, nw_proto); > + } else if (!strcmp(name, "tp_src")) { > + FLOWMAP_SET(map, tp_src); > + FLOW_MASK_FIELD(mask, tp_src); > + } else if (!strcmp(name, "tp_dst")) { > + FLOWMAP_SET(map, tp_dst); > + FLOW_MASK_FIELD(mask, tp_dst); > + } else if (strncmp(name, "pad", 3)) { > + VLOG_ERR("Unsupported flow key %s", name); > + return EOPNOTSUPP; > + } > + } > + > + miniflow_init(mf, &mask); > + > + return 0; > +} > + > +static bool > +is_supported_keys(struct netdev_info *netdev_info, const struct minimask > *mask) > +{ > + const struct miniflow *mf = &mask->masks; > + const uint64_t *p = miniflow_get_values(mf); > + size_t idx; > + > + FLOWMAP_FOR_EACH_INDEX(idx, mf->map) { > + uint64_t supported = miniflow_get(&netdev_info->supported_keys, idx); > + if (~supported & *p) { > + VLOG_DBG("Unsupported key: Index=%lu, Supported=%lx, Mask=%lx", > + idx, supported, *p); > + return false; > + } > + p++; > + } > + return true; > +} > + > +static int > +probe_supported_actions(struct netdev_info *netdev_info, struct btf *btf, > + uint32_t type) > +{ > + const struct btf_type *pt, *t; > + const struct btf_enum *v; > + int i; > + > + pt = btf__type_by_id(btf, type); > + if (!pt) { > + VLOG_ERR("\"supported_actions\" field type is unknown"); > + return EINVAL; > + } > + if (!btf_is_ptr(pt)) { > + VLOG_ERR("\"supported_actions\" field is not ptr"); > + return EINVAL; > + } > + t = btf__type_by_id(btf, pt->type); > + if (!t) { > + VLOG_ERR("\"supported_actions\" ptr type is unknown"); > + return EINVAL; > + } > + if (!btf_is_enum(t)) { > + VLOG_ERR("\"supported_actions\" field is not enum ptr"); > + return EINVAL; > + } > + > + netdev_info->supported_actions = 0; > + v = btf_enum(t); > + for (i = 0; i < btf_vlen(t); i++) { > + const char *name = btf__name_by_offset(btf, v[i].name_off); > + > + if (!name) { > + VLOG_ERR("Unnamed field #%d in \"supported_actions\" enum", i); > + return EINVAL; > + } > + switch (v[i].val) { > + case OVS_ACTION_ATTR_OUTPUT: > + case OVS_ACTION_ATTR_PUSH_VLAN: > + case OVS_ACTION_ATTR_POP_VLAN: > + netdev_info->supported_actions |= (1 << v[i].val); > + break; > + default: > + VLOG_ERR("Action \"%s\" (%d) is not supported", > + name, v[i].val); > + return EOPNOTSUPP; > + } > + } > + > + return 0; > +} > + > +static bool > +is_supported_actions(struct netdev_info *netdev_info, > + const struct nlattr *actions, size_t actions_len) > +{ > + const struct nlattr *a; > + unsigned int left; > + int actions_num = 0; > + > + NL_ATTR_FOR_EACH_UNSAFE(a, left, actions, actions_len) { > + int type = nl_attr_type(a); > + > + if (!(netdev_info->supported_actions & (1 << type))) { > + VLOG_DBG("Unsupported action: %d", type); > + return false; > + } > + actions_num++; > + } > + > + if (actions_num > netdev_info->max_actions) { > + VLOG_DBG("Too many actions: %d", actions_num); > + return false; > + } > + return true; > +} > + > +static int > +probe_max_actions(struct netdev_info *netdev_info, struct btf *btf, > + uint32_t type) > +{ > + const struct btf_type *pt, *at; > + const struct btf_array *arr; > + > + pt = btf__type_by_id(btf, type); > + if (!pt) { > + VLOG_ERR("\"max_actions\" field type is unknown"); > + return EINVAL; > + } > + if (!btf_is_ptr(pt)) { > + VLOG_ERR("\"max_actions\" field is not ptr"); > + return EINVAL; > + } > + at = btf__type_by_id(btf, pt->type); > + if (!at) { > + VLOG_ERR("\"max_actions\" ptr type is unknown"); > + return EINVAL; > + } > + if (!btf_is_array(at)) { > + VLOG_ERR("\"max_actions\" field is not array ptr"); > + return EINVAL; > + } > + arr = btf_array(at); > + netdev_info->max_actions = arr->nelems; > + > + return 0; > +} > + > +static int > +probe_meta_info(struct netdev_info *netdev_info, struct btf *btf) > +{ > + int32_t meta_sec_id; > + struct btf_var_secinfo *vi; > + struct btf_member *m; > + const struct btf_type *sec, *t = NULL; > + bool supported_keys_found = false; > + int i; > + > + meta_sec_id = btf__find_by_name_kind(btf, ".ovs_meta", BTF_KIND_DATASEC); > + if (meta_sec_id < 0) { > + VLOG_ERR("BUG: \".ovs_meta\" datasec not found in BTF"); > + return EINVAL; > + } > + > + sec = btf__type_by_id(btf, meta_sec_id); > + for (i = 0, vi = btf_var_secinfos(sec); i < btf_vlen(sec); i++, vi++) { > + const struct btf_type *var = btf__type_by_id(btf, vi->type); > + const char *name; > + > + if (!var) { > + VLOG_ERR("\".ovs_meta\" var #%d type is unknown", i); > + return EINVAL; > + } > + name = btf__name_by_offset(btf, var->name_off); > + if (!name) { > + VLOG_ERR("\".ovs_meta\" var #%d name is empty", i); > + return EINVAL; > + } > + if (strcmp(name, "meta_info")) { > + continue; > + } > + if (!btf_is_var(var)) { > + VLOG_ERR("\"meta_info\" is not var"); > + return EINVAL; > + } > + t = btf__type_by_id(btf, var->type); > + if (!t) { > + VLOG_ERR("\"meta_info\" var type is unknown"); > + return EINVAL; > + } > + break; > + } > + > + if (!t) { > + VLOG_ERR("\"meta_info\" var not found in \".ovs_meta\" datasec"); > + return EINVAL; > + } > + > + if (!btf_is_struct(t)) { > + VLOG_ERR("\"meta_info\" is not struct"); > + return EINVAL; > + } > + > + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { > + const char *name = btf__name_by_offset(btf, m->name_off); > + int err; > + > + if (!name) { > + VLOG_ERR("Invalid field #%d in \"meta_info\" struct", i); > + return EINVAL; > + } > + if (!strcmp(name, "supported_keys")) { > + err = probe_supported_keys(netdev_info, btf, m->type); > + if (err) { > + return err; > + } > + supported_keys_found = true; > + } else if (!strcmp(name, "supported_actions")) { > + err = probe_supported_actions(netdev_info, btf, m->type); > + if (err) { > + return err; > + } > + } else if (!strcmp(name, "max_actions")) { > + err = probe_max_actions(netdev_info, btf, m->type); > + if (err) { > + return err; > + } > + } else { > + VLOG_ERR("Unsupported meta_info key %s", name); > + return EOPNOTSUPP; > + } > + } > + > + if (!supported_keys_found) { > + VLOG_ERR("\"supported_keys\" field not found in \"meta_info\""); > + return EINVAL; > + } > + if (!netdev_info->supported_actions) { > + VLOG_ERR("\"supported_actions\" field not found in \"meta_info\""); > + return EINVAL; > + } > + if (!netdev_info->max_actions) { > + VLOG_ERR("\"max_actions\" field not found in \"meta_info\""); > + return EINVAL; > + } > + > + return 0; > +} > + > + > +static struct netdev_info * > +find_netdev_info(odp_port_t port) > +{ > + size_t port_hash = hash_bytes(&port, sizeof port, 0); > + struct netdev_info *netdev_info; > + > + HMAP_FOR_EACH_WITH_HASH(netdev_info, port_node, port_hash, > + &netdev_info_table) { > + if (port == netdev_info->port) { > + return netdev_info; > + } > + } > + > + return NULL; > +} > + > +static int > +get_odp_port(struct netdev *netdev, odp_port_t *port) > +{ > + int ifindex; > + > + ifindex = netdev_get_ifindex(netdev); > + if (ifindex < 0) { > + VLOG_ERR_RL(&rl, "Failed to get ifindex for %s: %s", > + netdev_get_name(netdev), ovs_strerror(-ifindex)); > + return -ifindex; > + } > + > + *port = netdev_ifindex_to_odp_port(ifindex); > + return 0; > +} > + > +static struct netdev_info * > +get_netdev_info(struct netdev *netdev) > +{ > + struct netdev_info *netdev_info; > + odp_port_t port; > + > + if (get_odp_port(netdev, &port)) { > + return NULL; > + } > + > + netdev_info = find_netdev_info(port); > + if (!netdev_info) { > + VLOG_ERR_RL(&rl, "Failed to find netdev_info for %s", > + netdev_get_name(netdev)); > + } > + > + return netdev_info; > +} > + > + > +/* Convert odp_port to devmap_idx in output action */ > +static int > +convert_port_to_devmap_idx(struct nlattr *actions, size_t actions_len) > +{ > + struct nlattr *a; > + unsigned int left; > + bool output_seen = false; > + > + NL_ATTR_FOR_EACH_UNSAFE(a, left, actions, actions_len) { > + int type = nl_attr_type(a); > + > + if (output_seen) { > + VLOG_DBG("XDP does not support packet copy"); > + return EOPNOTSUPP; > + } > + > + if (type == OVS_ACTION_ATTR_OUTPUT) { > + odp_port_t *port; > + struct netdev_info *netdev_info; > + > + port = CONST_CAST(odp_port_t *, > + nl_attr_get_unspec(a, sizeof(odp_port_t))); > + netdev_info = find_netdev_info(*port); > + if (!netdev_info) { > + VLOG_DBG("Cannot output to port %u without XDP prog > attached", > + *port); > + return EOPNOTSUPP; > + } > + /* XXX: Some NICs cannot handle XDP_REDIRECT'ed packets even with > + * XDP program enabled. Linux netdev community is considering > + * adding feature detection in XDP */ > + > + *port = u32_to_odp(netdev_info->devmap_idx); > + output_seen = true; > + } > + } > + > + return 0; > +} > + > +static struct devmap_idx_data * > +find_devmap_idx(int devmap_idx) > +{ > + struct devmap_idx_data *data; > + size_t hash = hash_bytes(&devmap_idx, sizeof devmap_idx, 0); > + > + HMAP_FOR_EACH_WITH_HASH(data, node, hash, &devmap_idx_table) { > + if (devmap_idx == data->devmap_idx) { > + return data; > + } > + } > + > + return NULL; > +} > + > +static int > +get_new_devmap_idx(int *pidx) Can we also use id-pool here? > +{ > + static int max_devmap_idx = 0; why static here? > + int offset; > + > + for (offset = 0; offset < XDP_MAX_PORTS; offset++) { > + int devmap_idx = max_devmap_idx++; > + > + if (max_devmap_idx >= XDP_MAX_PORTS) { > + max_devmap_idx -= XDP_MAX_PORTS; > + } > + > + if (!find_devmap_idx(devmap_idx)) { > + struct devmap_idx_data *data; > + size_t hash = hash_bytes(&devmap_idx, sizeof devmap_idx, 0); > + > + data = xzalloc(sizeof *data); > + data->devmap_idx = devmap_idx; > + hmap_insert(&devmap_idx_table, &data->node, hash); > + > + *pidx = devmap_idx; > + return 0; > + } > + } > + > + return ENOSPC; > +} > + > +static void > +delete_devmap_idx(int devmap_idx) > +{ > + struct devmap_idx_data *data = find_devmap_idx(devmap_idx); > + > + if (data) { > + hmap_remove(&devmap_idx_table, &data->node); > + free(data); > + } > +} > + > + > +static int > +get_table_fd(const struct bpf_object *obj, const char *table_name, > + int *pmap_fd) > +{ > + struct bpf_map *map; > + int map_fd; > + > + map = bpf_object__find_map_by_name(obj, table_name); > + if (!map) { > + VLOG_ERR_RL(&rl, "BPF map \"%s\" not found", table_name); > + return ENOENT; > + } > + > + map_fd = bpf_map__fd(map); > + if (map_fd < 0) { > + VLOG_ERR_RL(&rl, "Invalid BPF map fd: %s", > + ovs_libbpf_strerror(map_fd)); > + return EINVAL; > + } > + > + *pmap_fd = map_fd; > + return 0; > +} > + > +static int > +get_subtbl_masks_hd_fd(const struct bpf_object *obj, int *head_fd) > +{ > + return get_table_fd(obj, "subtbl_masks_hd", head_fd); > +} > + > +static int > +get_subtbl_masks_hd(int head_fd, int *head) > +{ > + int err, zero = 0; > + > + if (bpf_map_lookup_elem(head_fd, &zero, head)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot get subtbl_masks_hd: %s", > + ovs_strerror(errno)); > + return err; > + } > + > + return 0; > +} > + > +static int > +update_subtbl_masks_hd(int head_fd, int head) > +{ > + int err, zero = 0; > + > + if (bpf_map_update_elem(head_fd, &zero, &head, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot update subtbl_masks_hd: %s", > + ovs_strerror(errno)); > + return err; > + } > + > + return 0; > +} > + > +static int > +get_subtbl_masks_fd(const struct bpf_object *obj, int *masks_fd) > +{ > + return get_table_fd(obj, "subtbl_masks", masks_fd); > +} > + > +static int > +get_flow_table_fd(const struct bpf_object *obj, int *tables_fd) > +{ > + return get_table_fd(obj, "flow_table", tables_fd); > +} > + > +static int > +get_output_map_fd(const struct bpf_object *obj, int *output_map_fd) > +{ > + return get_table_fd(obj, "output_map", output_map_fd); > +} > + > + > +static int > +netdev_xdp_flow_put(struct netdev *netdev, struct match *match_, > + struct nlattr *actions, size_t actions_len, > + const ovs_u128 *ufid, struct offload_info *info > OVS_UNUSED, > + struct dpif_flow_stats *stats OVS_UNUSED) > +{ > + struct netdev_info *netdev_info; > + struct bpf_object *obj = get_xdp_object(netdev); > + struct minimatch minimatch; > + struct match *match; > + uint32_t key_size; > + size_t fidx; > + uint64_t *flow_u64, *mask_u64, *tmp_values; > + int masks_fd, head_fd, flow_table_fd, subtbl_fd, free_slot, head; > + struct xdp_subtable_mask_header *entry, *pentry; > + struct xdp_flow_actions_header *xdp_actions; > + char subtbl_name[BPF_OBJ_NAME_LEN]; > + size_t hash; > + struct ufid_to_xdp_data *data; > + int cnt, idx, pidx; > + int err; > + > + netdev_info = get_netdev_info(netdev); > + if (!netdev_info) { > + return ENOENT; > + } > + > + /* Assume only eth packets on packet reception in XDP */ > + if (match_->wc.masks.packet_type && > + match_->flow.packet_type != htonl(PT_ETH)) { > + VLOG_DBG_RL(&rl, "Packet type not ETH"); > + return EOPNOTSUPP; > + } > + > + /* probe_supported_key() does not support recirculation */ > + if (match_->wc.masks.recirc_id && match_->flow.recirc_id) { > + VLOG_DBG_RL(&rl, "Recirc id not zero"); > + return EOPNOTSUPP; > + } > + > + match = xmemdup(match_, sizeof *match); > + /* XDP only handles packets with packet_type = 0 and recirc_id = 0 so > + * clear masks to reduce max key size */ > + match->wc.masks.packet_type = 0; > + match->wc.masks.recirc_id = 0; > + /* We install per-port XDP classifier table so no need for odp_port */ > + match->wc.masks.in_port.odp_port = 0; > + minimatch_init(&minimatch, match); > + free(match); > + > + key_size = MINIFLOW_VALUES_SIZE(miniflow_n_values(minimatch.flow)); > + if (key_size > netdev_info->key_size) { > + err = EOPNOTSUPP; > + VLOG_DBG_RL(&rl, "Key size too big"); > + goto err; > + } > + > + if (sizeof(struct xdp_flow_actions_header) + actions_len > > + netdev_info->max_actions_len) { > + err = EOPNOTSUPP; > + VLOG_DBG_RL(&rl, "Actions size too big"); > + goto err; > + } > + > + /* XDP only uses masked keys so need to mask the key before adding an > + * entry otherwise table miss unexpectedly happens in XDP */ > + mask_u64 = miniflow_values(&minimatch.mask->masks); > + flow_u64 = miniflow_values(minimatch.flow); > + FLOWMAP_FOR_EACH_INDEX(fidx, minimatch.mask->masks.map) { > + *flow_u64++ &= *mask_u64++; > + } > + > + if (!is_supported_keys(netdev_info, minimatch.mask)) { > + err = EOPNOTSUPP; > + VLOG_DBG_RL(&rl, "Key not supported"); > + goto err; > + } > + > + if (!is_supported_actions(netdev_info, actions, actions_len)) { > + err = EOPNOTSUPP; > + VLOG_DBG_RL(&rl, "Actions not supported"); > + goto err; > + } > + > + /* subtables in XDP is hash table whose key is miniflow value and whose > + * value is actions preceded by actions_len */ > + xdp_actions = xzalloc(netdev_info->max_actions_len); > + xdp_actions->actions_len = actions_len; > + memcpy(xdp_flow_actions(xdp_actions), actions, actions_len); > + > + /* TODO: Use XDP_TX for redirect action when possible */ XDP_TX can only forward the packet to one netdev. What happen when the actions have multiple output ports? > + err = convert_port_to_devmap_idx(xdp_flow_actions(xdp_actions), > + actions_len); > + if (err) { > + goto err_actions; > + } > + > + err = get_subtbl_masks_fd(obj, &masks_fd); > + if (err) { > + goto err_actions; > + } > + > + err = get_subtbl_masks_hd_fd(obj, &head_fd); > + if (err) { > + goto err_actions; > + } > + > + err = get_subtbl_masks_hd(head_fd, &head); > + if (err) { > + goto err_actions; > + } > + > + err = get_flow_table_fd(obj, &flow_table_fd); > + if (err) { > + goto err_actions; > + } > + > + entry = xzalloc(netdev_info->subtable_mask_size); > + pentry = xzalloc(netdev_info->subtable_mask_size); > + > + /* Iterate subtable mask list implemented using array */ > + idx = head; > + for (cnt = 0; cnt < netdev_info->max_subtables; cnt++) { > + if (idx == XDP_SUBTABLES_TAIL) { > + break; > + } > + > + if (bpf_map_lookup_elem(masks_fd, &idx, entry)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot lookup subtbl_masks: %s", > + ovs_strerror(errno)); > + goto err_entry; > + } > + > + if (minimask_equal(minimatch.mask, &entry->mask)) { > + __u32 id; > + > + if (bpf_map_lookup_elem(flow_table_fd, &idx, &id)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot lookup flow_table: %s", > + ovs_strerror(errno)); > + goto err_entry; > + } > + > + subtbl_fd = bpf_map_get_fd_by_id(id); > + if (subtbl_fd < 0) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot get subtbl fd by id: %s", > + ovs_strerror(errno)); > + goto err_entry; > + } > + > + tmp_values = xzalloc(netdev_info->key_size); > + memcpy(tmp_values, miniflow_get_values(minimatch.flow), > key_size); > + if (bpf_map_update_elem(subtbl_fd, tmp_values, xdp_actions, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot insert flow entry: %s", > + ovs_strerror(errno)); > + free(tmp_values); > + goto err_close; > + } > + > + entry->count++; > + if (bpf_map_update_elem(masks_fd, &idx, entry, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot update subtbl_masks count: %s", > + ovs_strerror(errno)); > + bpf_map_delete_elem(subtbl_fd, tmp_values); > + free(tmp_values); > + goto err_close; > + } > + free(tmp_values); > + > + goto out; > + } > + > + memcpy(pentry, entry, netdev_info->subtable_mask_size); > + pidx = idx; > + idx = entry->next; question about this for loop: why do we need to maintain ->next for the struct xdp_subtable_mask_header *entry? I thought we have a fixed-sized array of subtable. And all we need to do is go over all the valid index of the array, until finding a match. Thanks William > + } > + > + if (cnt == netdev_info->max_subtables && idx != XDP_SUBTABLES_TAIL) { > + err = EINVAL; > + VLOG_ERR_RL(&rl, > + "Cannot lookup subtbl_masks: Broken subtbl_masks list"); > + goto err_entry; > + } > + > + /* Subtable was not found. Create a new one */ > + > + err = get_subtbl_masks_free_slot(netdev_info, &free_slot); > + if (err) { > + goto err_entry; > + } > + > + miniflow_clone(&entry->mask.masks, &minimatch.mask->masks, > + miniflow_n_values(&minimatch.mask->masks)); > + entry->mf_bits_u0 = count_1bits(minimatch.mask->masks.map.bits[0]); > + entry->mf_bits_u1 = count_1bits(minimatch.mask->masks.map.bits[1]); > + entry->count = 1; > + entry->next = XDP_SUBTABLES_TAIL; > + if (bpf_map_update_elem(masks_fd, &free_slot, entry, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot update subtbl_masks: %s", > + ovs_strerror(errno)); > + goto err_entry; > + } > + > + if (snprintf(subtbl_name, BPF_OBJ_NAME_LEN, "subtbl_%d_%d", > + netdev_info->port, free_slot) < 0) { > + err = errno; > + VLOG_ERR_RL(&rl, "snprintf for subtable name failed: %s", > + ovs_strerror(errno)); > + goto err_entry; > + } > + subtbl_fd = bpf_create_map_name(BPF_MAP_TYPE_HASH, subtbl_name, > + netdev_info->key_size, > + netdev_info->max_actions_len, > + netdev_info->max_entries, 0); > + if (subtbl_fd < 0) { > + err = errno; > + VLOG_ERR_RL(&rl, "map creation for subtbl failed: %s", > + ovs_strerror(errno)); > + goto err_entry; > + } > + > + tmp_values = xzalloc(netdev_info->key_size); > + memcpy(tmp_values, miniflow_get_values(minimatch.flow), key_size); > + if (bpf_map_update_elem(subtbl_fd, tmp_values, xdp_actions, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot insert flow entry: %s", > ovs_strerror(errno)); > + free(tmp_values); > + goto err_close; > + } > + free(tmp_values); > + > + if (bpf_map_update_elem(flow_table_fd, &free_slot, &subtbl_fd, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Failed to insert subtbl into flow_table: %s", > + ovs_strerror(errno)); > + goto err_close; > + } > + > + if (cnt == 0) { > + err = update_subtbl_masks_hd(head_fd, free_slot); > + if (err) { > + goto err_subtbl; > + } > + } else { > + pentry->next = free_slot; > + /* This effectively only updates one byte of entry->next */ > + if (bpf_map_update_elem(masks_fd, &pidx, pentry, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot update subtbl_masks prev entry: %s", > + ovs_strerror(errno)); > + goto err_subtbl; > + } > + } > + delete_subtbl_masks_free_slot(netdev_info, free_slot); > +out: > + hash = hash_bytes(ufid, sizeof *ufid, 0); > + data = xzalloc(sizeof *data); > + data->ufid = *ufid; > + miniflow_clone(&data->mask.masks, &minimatch.mask->masks, > + miniflow_n_values(&minimatch.mask->masks)); > + miniflow_clone(&data->flow, minimatch.flow, > + miniflow_n_values(minimatch.flow)); > + ovs_mutex_lock(&ufid_lock); > + hmap_insert(&ufid_to_xdp, &data->ufid_node, hash); > + ovs_mutex_unlock(&ufid_lock); > +err_close: > + close(subtbl_fd); > +err_entry: > + free(pentry); > + free(entry); > +err_actions: > + free(xdp_actions); > +err: > + minimatch_destroy(&minimatch); > + > + return err; > + > +err_subtbl: > + bpf_map_delete_elem(flow_table_fd, &free_slot); > + > + goto err_close; > +} > + > +static int > +netdev_xdp_flow_get(struct netdev *netdev OVS_UNUSED, > + struct match *match OVS_UNUSED, > + struct nlattr **actions OVS_UNUSED, > + const ovs_u128 *ufid OVS_UNUSED, > + struct dpif_flow_stats *stats OVS_UNUSED, > + struct dpif_flow_attrs *attrs OVS_UNUSED, > + struct ofpbuf *buf OVS_UNUSED) > +{ > + /* TODO: Implement this */ > + return 0; > +} > + > +static int > +netdev_xdp_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > + struct dpif_flow_stats *stats OVS_UNUSED) > +{ > + struct netdev_info *netdev_info; > + struct bpf_object *obj = get_xdp_object(netdev); > + size_t hash; > + struct ufid_to_xdp_data *data; > + int masks_fd, head_fd, flow_table_fd, subtbl_fd, head; > + struct xdp_subtable_mask_header *entry, *pentry; > + int err, cnt, idx, pidx; > + __u32 id; > + > + netdev_info = get_netdev_info(netdev); > + if (!netdev_info) { > + return ENOENT; > + } > + > + err = get_subtbl_masks_fd(obj, &masks_fd); > + if (err) { > + return err; > + } > + > + err = get_subtbl_masks_hd_fd(obj, &head_fd); > + if (err) { > + return err; > + } > + > + err = get_subtbl_masks_hd(head_fd, &head); > + if (err) { > + return err; > + } > + > + err = get_flow_table_fd(obj, &flow_table_fd); > + if (err) { > + return err; > + } > + > + hash = hash_bytes(ufid, sizeof *ufid, 0); > + ovs_mutex_lock(&ufid_lock); > + HMAP_FOR_EACH_WITH_HASH(data, ufid_node, hash, &ufid_to_xdp) { > + if (ovs_u128_equals(*ufid, data->ufid)) { > + break; > + } > + } > + if (!data) { > + ovs_mutex_unlock(&ufid_lock); > + VLOG_WARN_RL(&rl, "Cannot find flow key to delete"); > + return ENOENT; > + } > + hmap_remove(&ufid_to_xdp, &data->ufid_node); > + ovs_mutex_unlock(&ufid_lock); > + > + entry = xzalloc(netdev_info->subtable_mask_size); > + pentry = xzalloc(netdev_info->subtable_mask_size); > + > + /* Iterate subtable mask list implemented using array */ > + idx = head; > + for (cnt = 0; cnt < netdev_info->max_subtables; cnt++) { > + if (idx == XDP_SUBTABLES_TAIL) { > + err = ENOENT; > + VLOG_ERR_RL(&rl, "Cannot lookup subtbl_masks: %s", > + ovs_strerror(err)); > + goto out; > + } > + > + if (bpf_map_lookup_elem(masks_fd, &idx, entry)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot lookup subtbl_masks: %s", > + ovs_strerror(errno)); > + goto out; > + } > + > + if (minimask_equal(&data->mask, &entry->mask)) { > + break; > + } > + > + memcpy(pentry, entry, netdev_info->subtable_mask_size); > + pidx = idx; > + idx = entry->next; > + } > + > + if (cnt == netdev_info->max_subtables) { > + err = ENOENT; > + VLOG_ERR_RL(&rl, > + "Cannot lookup subtbl_masks: Broken subtbl_masks list"); > + goto out; > + } > + > + if (bpf_map_lookup_elem(flow_table_fd, &idx, &id)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot lookup flow_table: %s", > ovs_strerror(errno)); > + goto out; > + } > + > + subtbl_fd = bpf_map_get_fd_by_id(id); > + if (subtbl_fd < 0) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot get subtbl fd by id: %s", > + ovs_strerror(errno)); > + goto out; > + } > + > + bpf_map_delete_elem(subtbl_fd, miniflow_get_values(&data->flow)); > + close(subtbl_fd); > + > + if (--entry->count > 0) { > + if (bpf_map_update_elem(masks_fd, &idx, entry, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot update subtbl_masks count: %s", > + ovs_strerror(errno)); > + } > + > + goto out; > + } > + > + if (entry->count == (uint16_t)-1) { > + VLOG_WARN_RL(&rl, "subtbl_masks has negative count: %d", > + entry->count); > + } > + > + if (cnt == 0) { > + err = update_subtbl_masks_hd(head_fd, entry->next); > + if (err) { > + goto out; > + } > + } else { > + pentry->next = entry->next; > + /* This effectively only updates one byte of entry->next */ > + if (bpf_map_update_elem(masks_fd, &pidx, pentry, 0)) { > + err = errno; > + VLOG_ERR_RL(&rl, "Cannot update subtbl_masks prev entry: %s", > + ovs_strerror(errno)); > + goto out; > + } > + } > + > + bpf_map_delete_elem(flow_table_fd, &idx); > + err = add_subtbl_masks_free_slot(netdev_info, idx); > + if (err) { > + VLOG_ERR_RL(&rl, "Cannot add subtbl_masks free slot: %s", > + ovs_strerror(err)); > + } > +out: > + free(data); > + free(pentry); > + free(entry); > + > + return err; > +} > + > +static int > +netdev_xdp_init_flow_api(struct netdev *netdev) > +{ > + struct bpf_object *obj; > + struct btf *btf; > + struct netdev_info *netdev_info; > + struct bpf_map *flow_table, *subtbl_template, *subtbl_masks; > + const struct bpf_map_def *flow_table_def, *subtbl_def, *subtbl_masks_def; > + odp_port_t port; > + size_t port_hash; > + int output_map_fd; > + int err, ifindex, devmap_idx; > + > + if (!has_xdp_flowtable(netdev)) { > + return EOPNOTSUPP; > + } > + > + ifindex = netdev_get_ifindex(netdev); > + if (ifindex < 0) { > + VLOG_ERR("Failed to get ifindex for %s: %s", > + netdev_get_name(netdev), ovs_strerror(-ifindex)); > + return -ifindex; > + } > + port = netdev_ifindex_to_odp_port(ifindex); > + > + netdev_info = find_netdev_info(port); > + if (netdev_info) { > + VLOG_ERR("xdp offload is already initialized for netdev %s", > + netdev_get_name(netdev)); > + return EEXIST; > + } > + > + obj = get_xdp_object(netdev); > + err = get_new_devmap_idx(&devmap_idx); > + if (err) { > + VLOG_ERR("Failed to get new devmap idx: %s", ovs_strerror(err)); > + return err; > + } > + > + netdev_info = xzalloc(sizeof *netdev_info); > + netdev_info->devmap_idx = devmap_idx; > + > + if (get_odp_port(netdev, &netdev_info->port) || !netdev_info->port) { > + VLOG_ERR("Failed to get odp_port for %s", netdev_get_name(netdev)); > + err = ENOENT; > + goto err; > + } > + > + btf = bpf_object__btf(obj); > + if (!btf) { > + VLOG_ERR("BUG: BPF object for netdev \"%s\" does not contain BTF", > + netdev_get_name(netdev)); > + err = EINVAL; > + goto err; > + } > + > + err = probe_meta_info(netdev_info, btf); > + if (err) { > + VLOG_ERR("Failed to initialize xdp offload metadata for %s", > + netdev_get_name(netdev)); > + goto err; > + } > + > + flow_table = bpf_object__find_map_by_name(obj, "flow_table"); > + if (!flow_table) { > + VLOG_ERR("BUG: BPF map \"flow_table\" not found"); > + err = ENOENT; > + goto err; > + } > + flow_table_def = bpf_map__def(flow_table); > + if (flow_table_def->max_entries > XDP_MAX_SUBTABLES) { > + VLOG_ERR("flow_table max_entries must not be greater than %d", > + XDP_MAX_SUBTABLES); > + goto err; > + } > + netdev_info->max_subtables = flow_table_def->max_entries; > + > + subtbl_template = bpf_object__find_map_by_name(obj, "subtbl_template"); > + if (!subtbl_template) { > + VLOG_ERR("BUG: BPF map \"subtbl_template\" not found"); > + err = ENOENT; > + goto err; > + } > + subtbl_def = bpf_map__def(subtbl_template); > + netdev_info->key_size = subtbl_def->key_size; > + netdev_info->max_actions_len = subtbl_def->value_size; > + netdev_info->max_entries = subtbl_def->max_entries; > + > + subtbl_masks = bpf_object__find_map_by_name(obj, "subtbl_masks"); > + if (!subtbl_masks) { > + VLOG_ERR("BPF map \"subtbl_masks\" not found"); > + err = ENOENT; > + goto err; > + } > + subtbl_masks_def = bpf_map__def(subtbl_masks); > + if (subtbl_masks_def->max_entries != netdev_info->max_subtables) { > + VLOG_ERR("\"subtbl_masks\" map has different max_entries from > \"flow_table\""); > + goto err; > + } > + netdev_info->subtable_mask_size = subtbl_masks_def->value_size; > + init_subtbl_masks_free_slot(netdev_info); > + > + err = get_output_map_fd(obj, &output_map_fd); > + if (err) { > + goto err; > + } > + if (bpf_map_update_elem(output_map_fd, &devmap_idx, &ifindex, 0)) { > + err = errno; > + VLOG_ERR("Failed to insert idx %d if %s into output_map: %s", > + devmap_idx, netdev_get_name(netdev), ovs_strerror(errno)); > + goto err; > + } > + > + port_hash = hash_bytes(&port, sizeof port, 0); > + hmap_insert(&netdev_info_table, &netdev_info->port_node, port_hash); > + > + return 0; > +err: > + free(netdev_info); > + delete_devmap_idx(devmap_idx); > + return err; > +} > + > +static void > +netdev_xdp_uninit_flow_api(struct netdev *netdev) > +{ > + struct bpf_object *obj; > + struct netdev_info *netdev_info; > + int output_map_fd, devmap_idx; > + > + netdev_info = get_netdev_info(netdev); > + if (!netdev_info) { > + VLOG_WARN("%s: netdev_info not found on uninitializing xdp flow api", > + netdev_get_name(netdev)); > + return; > + } > + hmap_remove(&netdev_info_table, &netdev_info->port_node); > + > + devmap_idx = netdev_info->devmap_idx; > + obj = get_xdp_object(netdev); > + if (!get_output_map_fd(obj, &output_map_fd)) { > + bpf_map_delete_elem(output_map_fd, &devmap_idx); > + } else { > + VLOG_WARN("%s: Failed to get output_map fd on uninitializing xdp > flow api", > + netdev_get_name(netdev)); > + } > + > + free(netdev_info); > + delete_devmap_idx(devmap_idx); > +} > + > +const struct netdev_flow_api netdev_offload_xdp = { > + .type = "linux_xdp", > + .flow_put = netdev_xdp_flow_put, > + .flow_get = netdev_xdp_flow_get, > + .flow_del = netdev_xdp_flow_del, > + .init_flow_api = netdev_xdp_init_flow_api, > + .uninit_flow_api = netdev_xdp_uninit_flow_api, > +}; > diff --git a/lib/netdev-offload-xdp.h b/lib/netdev-offload-xdp.h > new file mode 100644 > index 000000000..800a0c0e5 > --- /dev/null > +++ b/lib/netdev-offload-xdp.h > @@ -0,0 +1,49 @@ > +/* > + * Copyright (c) 2020 NTT Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef NETDEV_OFFLOAD_XDP_H > +#define NETDEV_OFFLOAD_XDP_H 1 > + > +#include "flow.h" > + > +#define XDP_MAX_PORTS 65536 > +/* XDP_MAX_SUBTABLES must be <= 255 to fit in 1 byte with 1 value reserved > + * for TAIL */ > +#define XDP_MAX_SUBTABLES 128 > +#define XDP_SUBTABLES_TAIL XDP_MAX_SUBTABLES > + > +struct xdp_subtable_mask_header { > + uint16_t count; > + uint8_t mf_bits_u0; > + uint8_t mf_bits_u1; > + int next; > + struct minimask mask; > +}; > + > +struct xdp_flow_actions_header { > + size_t actions_len; > + /* Followed by netlink attributes (actions) */ > +}; > + > +struct nlattr; > + > +static inline struct nlattr * > +xdp_flow_actions(struct xdp_flow_actions_header *header) > +{ > + return (struct nlattr *)(header + 1); > +} > + > +#endif /* netdev-offload-xdp.h */ > diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c > index 669513646..f14f77420 100644 > --- a/lib/netdev-offload.c > +++ b/lib/netdev-offload.c > @@ -185,6 +185,9 @@ netdev_assign_flow_api(struct netdev *netdev) > if (!current_rfa || > (!netdev_flow_api_driver && > !strcmp(FLOW_API_DRIVER_DEFAULT, rfa->flow_api->type))) { > + if (current_rfa && current_rfa->flow_api->uninit_flow_api) { > + current_rfa->flow_api->uninit_flow_api(netdev); > + } > current_rfa = rfa; > } > } else { > @@ -326,6 +329,9 @@ netdev_uninit_flow_api(struct netdev *netdev) > > ovsrcu_set(&netdev->flow_api, NULL); > rfa = netdev_lookup_flow_api(flow_api->type); > + if (rfa->flow_api->uninit_flow_api) { > + rfa->flow_api->uninit_flow_api(netdev); > + } > ovs_refcount_unref(&rfa->refcnt); > } > > -- > 2.25.1 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev