Initialize psample socket. Add sample recv API to receive sampled packets from psample socket. Add sample recv wait API to add psample socket fd to poll list.
Signed-off-by: Chris Mi <c...@nvidia.com> Reviewed-by: Roi Dayan <r...@nvidia.com> --- lib/dpif.h | 6 +- lib/flow.h | 2 +- lib/netdev-offload-provider.h | 30 ++++++ lib/netdev-offload-tc.c | 173 ++++++++++++++++++++++++++++++++++ lib/netdev-offload.c | 3 +- lib/packets.h | 2 +- 6 files changed, 211 insertions(+), 5 deletions(-) diff --git a/lib/dpif.h b/lib/dpif.h index 0f2dc2ef3..7a0ea19e9 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -836,8 +836,10 @@ struct dpif_upcall { /* DPIF_UC_ACTION only. */ struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ - struct nlattr *out_tun_key; /* Output tunnel key. */ - struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ + struct nlattr *out_tun_key; /* Output tunnel key. */ + struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ + struct flow flow; /* Caller provided 'flow' if the 'key' is not + available. */ }; /* A callback to notify higher layer of dpif about to be purged, so that diff --git a/lib/flow.h b/lib/flow.h index 75a9be3c1..626986182 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -970,7 +970,7 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow) md->recirc_id = flow->recirc_id; md->dp_hash = flow->dp_hash; - flow_tnl_copy__(&md->tunnel, &flow->tunnel); + flow_tnl_copy(&md->tunnel, &flow->tunnel); md->skb_priority = flow->skb_priority; md->pkt_mark = flow->pkt_mark; md->in_port = flow->in_port; diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h index 9108856d1..53c272d07 100644 --- a/lib/netdev-offload-provider.h +++ b/lib/netdev-offload-provider.h @@ -28,6 +28,8 @@ extern "C" { #endif +struct dpif_upcall; + struct netdev_flow_api { char *type; /* Flush all offloaded flows from a netdev. @@ -121,6 +123,34 @@ struct netdev_flow_api { int (*meter_del)(ofproto_meter_id meter_id, struct ofputil_meter_stats *stats); + /* Polls for upcall offload packets for an upcall handler. If successful, + * stores the upcall into '*upcall', using 'buf' for storage. + * + * The implementation should point '&upcall->flow' and 'upcall->userdata' + * (if any) into data in the caller-provided 'buf'. The implementation may + * also use 'buf' for storing the data of 'upcall->packet'. If necessary + * to make room, the implementation may reallocate the data in 'buf'. + * + * The caller owns the data of 'upcall->packet' and may modify it. If + * packet's headroom is exhausted as it is manipulated, 'upcall->packet' + * will be reallocated. This requires the data of 'upcall->packet' to be + * released with ofpbuf_uninit() before 'upcall' is destroyed. However, + * when an error is returned, the 'upcall->packet' may be uninitialized + * and should not be released. + * + * This function must not block. If no upcall is pending when it is + * called, it should return EAGAIN without blocking. + * + * Return 0 if successful, otherwise returns a positive errno value. + */ + int (*recv)(struct dpif_upcall *upcall, struct ofpbuf *buf, + uint32_t handler_id); + + /* Arranges for the poll loop for an upcall handler to wake up when + * offload provider has a message queued to be received with the recv + * member functions. */ + void (*recv_wait)(uint32_t handler_id); + /* Initializies the netdev flow api. * Return 0 if successful, otherwise returns a positive errno value. */ int (*init_flow_api)(struct netdev *); diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index bdf111942..12a0556b2 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -18,6 +18,8 @@ #include <errno.h> #include <linux/if_ether.h> +#include <linux/psample.h> +#include <poll.h> #include "cmap.h" #include "dpif-provider.h" @@ -35,6 +37,7 @@ #include "openvswitch/hmap.h" #include "openvswitch/match.h" #include "openvswitch/ofpbuf.h" +#include "openvswitch/poll-loop.h" #include "openvswitch/thread.h" #include "openvswitch/types.h" #include "openvswitch/util.h" @@ -126,6 +129,9 @@ struct sgid_node { struct offload_sample sample; }; +static struct nl_sock *psample_sock; +static int psample_family; + /* The sgid_map mutex protects the sample_group_ids and the sgid_map for * cmap_insert(), cmap_remove(), or cmap_replace() operations. */ static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER; @@ -157,6 +163,14 @@ sgid_find(uint32_t id) return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL; } +static struct offload_sample * +sample_find(uint32_t id) +{ + struct sgid_node *node = sgid_find(id); + + return node ? &node->sample: NULL; +} + static void offload_sample_clone(struct offload_sample *dst, const struct offload_sample *src, @@ -3074,6 +3088,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids, hmap_destroy(&map); } +static void +psample_init(void) +{ + unsigned int psample_mcgroup; + int err; + + if (!netdev_is_flow_api_enabled()) { + VLOG_DBG("Flow API is not enabled"); + return; + } + + if (psample_sock) { + VLOG_DBG("Psample socket is already initialized"); + return; + } + + err = nl_lookup_genl_family(PSAMPLE_GENL_NAME, + &psample_family); + if (err) { + VLOG_INFO("Generic Netlink family '%s' does not exist: %s\n" + "Please make sure the kernel module psample is loaded", + PSAMPLE_GENL_NAME, ovs_strerror(err)); + return; + } + + err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, + PSAMPLE_NL_MCGRP_SAMPLE_NAME, + &psample_mcgroup); + if (err) { + VLOG_INFO("Failed to join Netlink multicast group '%s': %s", + PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err)); + return; + } + + err = nl_sock_create(NETLINK_GENERIC, &psample_sock); + if (err) { + VLOG_INFO("Failed to create psample socket: %s", ovs_strerror(err)); + return; + } + + err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup); + if (err) { + VLOG_INFO("Failed to join psample mcgroup: %s", ovs_strerror(err)); + nl_sock_destroy(psample_sock); + psample_sock = NULL; + return; + } +} + static int netdev_tc_init_flow_api(struct netdev *netdev) { @@ -3134,6 +3197,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) ovs_mutex_lock(&sgid_lock); sample_group_ids = id_pool_create(1, UINT32_MAX - 1); ovs_mutex_unlock(&sgid_lock); + psample_init(); ovsthread_once_done(&once); } @@ -3351,6 +3415,113 @@ meter_tc_del_policer(ofproto_meter_id meter_id, return err; } +struct offload_psample { + struct nlattr *packet; /* Packet data. */ + uint32_t group_id; /* Mapping id for sample offload. */ +}; + +static int +nl_parse_psample(struct offload_psample *psample, struct ofpbuf *buf) +{ + static const struct nl_policy ovs_psample_policy[] = { + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC }, + }; + struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)]; + struct genlmsghdr *genl; + struct nlmsghdr *nlmsg; + struct ofpbuf b; + + b = ofpbuf_const_initializer(buf->data, buf->size); + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + genl = ofpbuf_try_pull(&b, sizeof *genl); + if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family + || !nl_policy_parse(&b, 0, ovs_psample_policy, a, + ARRAY_SIZE(ovs_psample_policy))) { + return EINVAL; + } + + psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); + psample->packet = a[PSAMPLE_ATTR_DATA]; + + return 0; +} + +static int +psample_parse_packet(struct offload_psample *psample, + struct dpif_upcall *upcall) +{ + struct flow *flow = &upcall->flow; + struct offload_sample *sample; + + memset(upcall, 0, sizeof *upcall); + dp_packet_use_const(&upcall->packet, + nl_attr_get(psample->packet), + nl_attr_get_size(psample->packet)); + + sample = sample_find(psample->group_id); + if (!sample) { + VLOG_ERR_RL(&error_rl, "Failed to get sample info via group id: %d", + psample->group_id); + return ENOENT; + } + + upcall->userdata = sample->userdata; + if (sample->tunnel) { + flow_tnl_copy(&flow->tunnel, sample->tunnel); + } + if (sample->userspace_actions) { + upcall->actions = sample->userspace_actions; + } + flow->in_port.odp_port = netdev_ifindex_to_odp_port(sample->ifindex); + upcall->type = DPIF_UC_ACTION; + + return 0; +} + +static int +netdev_tc_recv(struct dpif_upcall *upcall, struct ofpbuf *buf, + uint32_t handler_id) +{ + int read_tries = 0; + + if (handler_id || !psample_sock) { + return EAGAIN; + } + + for (;;) { + struct offload_psample psample; + int error; + + if (++read_tries > 50) { + return EAGAIN; + } + + error = nl_sock_recv(psample_sock, buf, NULL, false); + if (error == ENOBUFS) { + continue; + } + if (error) { + return error; + } + error = nl_parse_psample(&psample, buf); + + return error ? error : psample_parse_packet(&psample, upcall); + } + + return EAGAIN; +} + +static void +netdev_tc_recv_wait(uint32_t handler_id) +{ + /* For simplicity, i.e., using a single NetLink socket, only the first + * handler thread will be used. */ + if (!handler_id && psample_sock) { + poll_fd_wait(nl_sock_fd(psample_sock), POLLIN); + } +} + const struct netdev_flow_api netdev_offload_tc = { .type = "linux_tc", .flow_flush = netdev_tc_flow_flush, @@ -3364,5 +3535,7 @@ const struct netdev_flow_api netdev_offload_tc = { .meter_set = meter_tc_set_policer, .meter_get = meter_tc_get_policer, .meter_del = meter_tc_del_policer, + .recv = netdev_tc_recv, + .recv_wait = netdev_tc_recv_wait, .init_flow_api = netdev_tc_init_flow_api, }; diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c index 931d634e1..5aad804c1 100644 --- a/lib/netdev-offload.c +++ b/lib/netdev-offload.c @@ -38,6 +38,7 @@ #include "netdev-provider.h" #include "netdev-vport.h" #include "odp-netlink.h" +#include "odp-util.h" #include "openflow/openflow.h" #include "packets.h" #include "openvswitch/ofp-print.h" @@ -826,7 +827,7 @@ odp_port_t netdev_ifindex_to_odp_port(int ifindex) { struct port_to_netdev_data *data; - odp_port_t ret = 0; + odp_port_t ret = ODPP_NONE; ovs_rwlock_rdlock(&ifindex_to_port_rwlock); HMAP_FOR_EACH_WITH_HASH (data, ifindex_node, ifindex, &ifindex_to_port) { diff --git a/lib/packets.h b/lib/packets.h index 8b6994809..b50ee00df 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -86,7 +86,7 @@ flow_tnl_size(const struct flow_tnl *src) * data in 'dst' is NOT cleared, so this must not be used in cases where the * uninitialized portion may be hashed over. */ static inline void -flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src) +flow_tnl_copy(struct flow_tnl *dst, const struct flow_tnl *src) { memcpy(dst, src, flow_tnl_size(src)); } -- 2.37.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev