On 2/24/2023 4:16 AM, Ilya Maximets wrote:
On 2/23/23 12:26, Chris Mi wrote:
Initialize psample socket. Add sFlow recv API to receive sampled
packets from psample socket. Add sFow recv wait API to add psample
socket fd to poll list.
Signed-off-by: Chris Mi<c...@nvidia.com>
Reviewed-by: Roi Dayan<r...@nvidia.com>
---
lib/dpif.h | 7 ++
lib/netdev-offload-provider.h | 11 ++
lib/netdev-offload-tc.c | 188 ++++++++++++++++++++++++++++++++++
3 files changed, 206 insertions(+)
diff --git a/lib/dpif.h b/lib/dpif.h
index 6cb4dae6d..95807af8f 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -836,6 +836,13 @@ struct dpif_upcall {
struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
struct nlattr *out_tun_key; /* Output tunnel key. */
struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
+ /* SFlow offload only.
+ * When receiving sampled packets from psample socket, there is no
+ * flow key. But input tunnel and input ifindex are available. They
+ * are enough to construct flow and continue to process sFlow.
+ */
+ struct flow_tnl *in_tun; /* Input tunnel key. */
+ uint32_t iifindex; /* Input ifindex. */
These should be replaced with a struct flow pointer. See the comments
on patch #6.
And we don't need ofload-specific comments. We just need to allow
datapath implementations to return either key or struct flow.
Done.
};
/* A callback to notify higher layer of dpif about to be purged, so that
diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
index 9108856d1..9e2722fd1 100644
--- a/lib/netdev-offload-provider.h
+++ b/lib/netdev-offload-provider.h
@@ -121,6 +121,17 @@ struct netdev_flow_api {
int (*meter_del)(ofproto_meter_id meter_id,
struct ofputil_meter_stats *stats);
+ /* Receives sampled packets in 'buf' from psample socket and fill the
+ * necessary members in 'upcall'.
+ * Return 0 if successful, otherwise returns a positive errno value.
+ */
+ int (*sflow_recv)(struct dpif_upcall *upcall, struct ofpbuf *buf);
+
+ /* Add psample socket fd to poll list. Wake the upcall thread up to
+ * process it if there is any sampled packets,
+ */
+ void (*sflow_recv_wait)(void);
These should be generic callbacks, not tied to psample or sflow.
There might be other reasons offload implementation wants to
send a packet to userspace. Also, psample doesn't make sense for
other offload implementations. So, just recv() and recv_wait().
Done.
+
/* Initializies the netdev flow api.
* Return 0 if successful, otherwise returns a positive errno value. */
int (*init_flow_api)(struct netdev *);
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 0dbb7954f..d7901fa68 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -18,6 +18,8 @@
#include <errno.h>
#include <linux/if_ether.h>
+#include <linux/psample.h>
+#include <sys/poll.h>
Just <poll.h>, no sys.
Done.
#include "dpif.h"
#include "hash.h"
@@ -104,6 +106,9 @@ static void parse_tc_flower_to_stats(struct tc_flower
*flower,
static int get_ufid_adjust_stats(const ovs_u128 *ufid,
struct dpif_flow_stats *stats);
+static struct nl_sock *psample_sock;
+static int psample_family;
+
/* When offloading sample action to TC, userspace creates a unique ID
* to map sFlow action and tunnel info and passes this ID to kernel
* instead of the sFlow info. Psample will send this ID and sampled
@@ -151,6 +156,19 @@ sgid_find(uint32_t id)
return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL;
}
+static struct offload_sflow *
+sflow_find(uint32_t id)
+{
+ const struct sgid_node *node;
+
+ node = sgid_find(id);
+ if (!node) {
+ return NULL;
+ }
+
+ return CONST_CAST(struct offload_sflow *, &node->sflow);
+}
+
static uint32_t
dpif_sflow_hash(const struct offload_sflow *sflow)
{
@@ -3015,6 +3033,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids,
hmap_destroy(&map);
}
+static void
+psample_init(void)
+{
+ unsigned int psample_mcgroup;
+ int err;
+
+ if (!netdev_is_flow_api_enabled()) {
+ VLOG_DBG("Flow API is not enabled.");
+ return;
+ }
+
+ if (psample_sock) {
+ VLOG_DBG("Psample socket is already initialized.");
+ return;
+ }
+
+ err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
+ &psample_family);
+ if (err) {
+ VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
+ "Please make sure the kernel module psample is loaded.",
+ PSAMPLE_GENL_NAME, ovs_strerror(err));
+ return;
+ }
+
+ err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
+ PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+ &psample_mcgroup);
+ if (err) {
+ VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
+ PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
+ return;
+ }
+
+ err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
+ if (err) {
+ VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
+ return;
+ }
+
+ err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
+ if (err) {
+ VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
+ nl_sock_destroy(psample_sock);
+ psample_sock = NULL;
+ return;
+ }
+}
+
static int
netdev_tc_init_flow_api(struct netdev *netdev)
{
@@ -3069,6 +3136,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
meter_police_ids = id_pool_create(METER_POLICE_IDS_BASE,
METER_POLICE_IDS_MAX - METER_POLICE_IDS_BASE + 1);
sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
+ psample_init();
Having 2 lines above under meter_police_ids_mutex doesn't make
a lot of sense.
I moved it to the end.
tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE,
METER_POLICE_IDS_MAX);
ovs_mutex_unlock(&meter_police_ids_mutex);
@@ -3288,6 +3356,124 @@ meter_tc_del_policer(ofproto_meter_id meter_id,
return err;
}
+struct offload_psample {
+ struct nlattr *packet; /* Packet data. */
+ int group_id; /* Mapping id for sFlow offload. */
+ int iifindex; /* Input ifindex. */
+};
+
+static int
+psample_from_ofpbuf(struct offload_psample *psample,
+ struct ofpbuf *buf)
+{
+ static const struct nl_policy ovs_psample_policy[] = {
+ [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
+ [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
+ [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
+ [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
+ };
+ struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
+ struct genlmsghdr *genl;
+ struct nlmsghdr *nlmsg;
+ struct ofpbuf b;
+
+ b = ofpbuf_const_initializer(buf->data, buf->size);
+ nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+ genl = ofpbuf_try_pull(&b, sizeof *genl);
+ if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
+ || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
+ ARRAY_SIZE(ovs_psample_policy))) {
+ return EINVAL;
+ }
+
+ psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
+ psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
+ psample->packet = a[PSAMPLE_ATTR_DATA];
+
+ return 0;
+}
+
+static int
+psample_parse_packet(struct offload_psample *psample,
+ struct offload_sflow *sflow,
Why 'sflow' is an argument here?
Removed.
+ struct dpif_upcall *upcall)
+{
+ dp_packet_use_stub(&upcall->packet,
+ CONST_CAST(struct nlattr *,
+ nl_attr_get(psample->packet)) - 1,
+ nl_attr_get_size(psample->packet) +
+ sizeof(struct nlattr));
+ dp_packet_set_data(&upcall->packet,
+ (char *) dp_packet_data(&upcall->packet) +
+ sizeof(struct nlattr));
+ dp_packet_set_size(&upcall->packet, nl_attr_get_size(psample->packet));
+
+ sflow = sflow_find(psample->group_id);
+ if (!sflow) {
+ return ENOENT;
+ }
+
+ upcall->key = NULL;
+ upcall->key_len = 0;
+ upcall->ufid = sflow->ufid;
+ upcall->userdata = sflow->userdata;
+ upcall->actions = CONST_CAST(struct nlattr *, sflow->actions);
+ upcall->in_tun = sflow->tunnel;
+ upcall->iifindex = psample->iifindex;
+ upcall->type = DPIF_UC_ACTION;
+
+ return 0;
+}
+
+static int
+sflow_psample_recv(struct dpif_upcall *upcall, struct ofpbuf *buf)
Handler id should be an argument here. And we should check that
it is zero. Upper layers do not/should not know how many sockets
we have here.
And 'sflow' should probably not be in the name.
Done.
+{
+ int read_tries = 0;
+
+ if (!psample_sock) {
+ return ENOENT;
+ }
+
+ for (;;) {
+ struct offload_psample psample;
+ struct offload_sflow sflow;
+ int error;
+
+ if (++read_tries > 50) {
+ return EAGAIN;
+ }
+
+ error = nl_sock_recv(psample_sock, buf, NULL, false);
+ if (error == ENOBUFS) {
+ continue;
+ }
+
+ if (error) {
+ if (error == EAGAIN) {
+ break;
+ }
+ return error;
+ }
+
+ error = psample_from_ofpbuf(&psample, buf);
+ if (!error) {
+ return psample_parse_packet(&psample, &sflow, upcall);
+ } else if (error) {
Condition here is always true.
I copied from dpif_netlink_recv_cpu_dispatch(). And I also think it is ok.
+ return error;
+ }
+ }
+
+ return EAGAIN;
+}
+
+static void
+sflow_psample_recv_wait(void)
Same. Handler id. Name.
Done.
+{
+ if (psample_sock) {
+ nl_sock_wait(psample_sock, POLLIN);
+ }
+}
+
const struct netdev_flow_api netdev_offload_tc = {
.type = "linux_tc",
.flow_flush = netdev_tc_flow_flush,
@@ -3301,5 +3487,7 @@ const struct netdev_flow_api netdev_offload_tc = {
.meter_set = meter_tc_set_policer,
.meter_get = meter_tc_get_policer,
.meter_del = meter_tc_del_policer,
+ .sflow_recv = sflow_psample_recv,
+ .sflow_recv_wait = sflow_psample_recv_wait,
.init_flow_api = netdev_tc_init_flow_api,
};
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev