new file mode 100644
index 000000000..02aea7e2d
--- /dev/null
+++ b/lib/dpif-offload-netlink.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.apache.org%2Flicenses%2FLICENSE-2.0&data=04%7C01%7Ccmi%40nvidia.com%7Caea6ff519e9c41a4a21108da035e3272%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637826000390994283%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=QzFQcAWcmd9qjB6dvsLOmULRkaHqYD1n1kOifjqRZgo%3D&reserved=0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <linux/psample.h>
+#include <sys/poll.h>
+
+#include "dpif-offload-provider.h"
+#include "netdev-offload.h"
+#include "netlink-protocol.h"
+#include "netlink-socket.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink);
+
+static struct nl_sock *psample_sock;
+static int psample_family;
+
+/* Receive psample netlink message and save the attributes. */
+struct offload_psample {
+ struct nlattr *packet; /* Packet data. */
+ int dp_group_id; /* Mapping id for sFlow offload. */
+ int iifindex; /* Input ifindex. */
+};
+
+/* In order to keep compatibility with kernels without psample module,
+ * return success even if psample is not initialized successfully. */
+static void
+psample_init(void)
+{
+ unsigned int psample_mcgroup;
+ int err;
+
+ if (!netdev_is_flow_api_enabled()) {
+ VLOG_DBG("Flow API is not enabled.");
+ return;
+ }
+
+ if (psample_sock) {
+ VLOG_DBG("Psample socket is already initialized.");
+ return;
+ }
+
+ err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
+ &psample_family);
+ if (err) {
+ VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
+ "Please make sure the kernel module psample is loaded.",
+ PSAMPLE_GENL_NAME, ovs_strerror(err));
+ return;
+ }
+
+ err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
+ PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+ &psample_mcgroup);
+ if (err) {
+ VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
+ PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
+ return;
+ }
+
+ err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
+ if (err) {
+ VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
+ return;
+ }
+
+ err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
+ if (err) {
+ VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
+ nl_sock_destroy(psample_sock);
+ psample_sock = NULL;
+ return;
+ }
+}
+
+static int
+dpif_offload_netlink_init(void)
+{
+ psample_init();
+
+ return 0;
+}
+
+static void
+psample_destroy(void)
+{
+ if (!psample_sock) {
+ return;
+ }
+
+ nl_sock_destroy(psample_sock);
+ psample_sock = NULL;
+}
+
+static void
+dpif_offload_netlink_destroy(void)
+{
+ psample_destroy();
+}
+
+static void
+dpif_offload_netlink_sflow_recv_wait(void)
+{
+ if (psample_sock) {
+ nl_sock_wait(psample_sock, POLLIN);
+ }
+}
+
+static int
+psample_from_ofpbuf(struct offload_psample *psample,
+ const struct ofpbuf *buf)
+{
+ static const struct nl_policy ovs_psample_policy[] = {
+ [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
+ [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
+ [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
+ [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
+ };
+ struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
+ struct genlmsghdr *genl;
+ struct nlmsghdr *nlmsg;
+ struct ofpbuf b;
+
+ b = ofpbuf_const_initializer(buf->data, buf->size);
+ nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+ genl = ofpbuf_try_pull(&b, sizeof *genl);
+ if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
+ || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
+ ARRAY_SIZE(ovs_psample_policy))) {
+ return EINVAL;
+ }
+
+ psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
+ psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
+ psample->packet = a[PSAMPLE_ATTR_DATA];
+
+ return 0;
+}
+
+static int
+psample_parse_packet(struct offload_psample *psample,
+ struct dpif_offload_sflow *sflow)
+{
+ dp_packet_use_stub(&sflow->packet,
+ CONST_CAST(struct nlattr *,
+ nl_attr_get(psample->packet)) - 1,
+ nl_attr_get_size(psample->packet) +
+ sizeof(struct nlattr));
+ dp_packet_set_data(&sflow->packet,
+ (char *) dp_packet_data(&sflow->packet) +
+ sizeof(struct nlattr));
+ dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet));
+
+ sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id);
+ if (!sflow->attr) {
+ return ENOENT;
+ }
+ sflow->iifindex = psample->iifindex;
+
+ return 0;
+}
+
+static int
+dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow)
+{
+ if (!psample_sock) {
+ return ENOENT;
+ }
+
+ for (;;) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ struct offload_psample psample;
+ struct ofpbuf buf;
+ int error;
+
+ ofpbuf_use_stub(&buf, sflow->buf_stub, sizeof sflow->buf_stub);
+ error = nl_sock_recv(psample_sock, &buf, NULL, false);
+
+ if (!error) {
+ error = psample_from_ofpbuf(&psample, &buf);
+ if (!error) {
+ ofpbuf_uninit(&buf);
+ error = psample_parse_packet(&psample, sflow);
+ return error;
+ }
+ } else if (error != EAGAIN) {
+ VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).",
+ ovs_strerror(error));
+ nl_sock_drain(psample_sock);
+ error = ENOBUFS;
+ }
+
+ ofpbuf_uninit(&buf);
+ if (error) {
+ return error;
+ }
+ }
+}
+
+const struct dpif_offload_class dpif_offload_netlink_class = {
+ .type = "system",
+ .init = dpif_offload_netlink_init,
+ .destroy = dpif_offload_netlink_destroy,
+ .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait,
+ .sflow_recv = dpif_offload_netlink_sflow_recv,
+};
diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
index af49eedb9..ac13601b5 100644
--- a/lib/dpif-offload-provider.h
+++ b/lib/dpif-offload-provider.h
@@ -17,12 +17,18 @@
#ifndef DPIF_OFFLOAD_PROVIDER_H
#define DPIF_OFFLOAD_PROVIDER_H
+#include "dp-packet.h"
#include "netlink-protocol.h"
#include "openvswitch/packets.h"
#include "openvswitch/types.h"
struct dpif;
-struct dpif_offload_sflow;
+struct registered_dpif_offload_class;
+
+#ifdef __linux__
+extern const struct dpif_offload_class dpif_offload_netlink_class;
+#endif
+extern const struct dpif_offload_class dpif_offload_netdev_class;
/* When offloading sample action, userspace creates a unique ID to map
* sFlow action and tunnel info and passes this ID to datapath instead
@@ -37,6 +43,14 @@ struct dpif_sflow_attr {
ovs_u128 ufid; /* Flow ufid. */
};
+/* Parse the specific dpif message to sFlow. So OVS can process it. */
+struct dpif_offload_sflow {
+ struct dp_packet packet; /* Packet data. */
+ uint64_t buf_stub[4096 / 8]; /* Buffer stub for packet data. */
+ uint32_t iifindex; /* Input ifindex. */
+ const struct dpif_sflow_attr *attr; /* SFlow attribute. */
+};
+
/* Datapath interface offload structure, to be defined by each implementation
* of a datapath interface.
*/
@@ -62,6 +76,15 @@ struct dpif_offload_class {
int (*sflow_recv)(struct dpif_offload_sflow *sflow);
};
+void dp_offload_initialize(void);
+void dpif_offload_close(struct dpif *);
+
+int dp_offload_register_provider(const struct dpif_offload_class *);
+int dp_offload_unregister_provider(const char *type);
+void dpif_offload_dummy_register(const char *type);
+void dp_offload_class_unref(struct registered_dpif_offload_class *rc);
+struct registered_dpif_offload_class *dp_offload_class_lookup(const char *);
+
void dpif_offload_sflow_recv_wait(const struct dpif *dpif);
int dpif_offload_sflow_recv(const struct dpif *dpif,
struct dpif_offload_sflow *sflow);
diff --git a/lib/dpif-offload.c b/lib/dpif-offload.c
index f2bf3e634..f3ac539ab 100644
--- a/lib/dpif-offload.c
+++ b/lib/dpif-offload.c
@@ -18,6 +18,163 @@
#include <errno.h>
#include "dpif-provider.h"
+#include "openvswitch/shash.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_offload);
+
+static const struct dpif_offload_class *base_dpif_offload_classes[] = {
+#if defined(__linux__)
+ &dpif_offload_netlink_class,
+#endif
+ &dpif_offload_netdev_class,
+};
+
+struct registered_dpif_offload_class {
+ const struct dpif_offload_class *offload_class;
+ int refcount;
+};
+static struct shash dpif_offload_classes =
+ SHASH_INITIALIZER(&dpif_offload_classes);
+
+/* Protects 'dpif_offload_classes', including the refcount. */
+static struct ovs_mutex dpif_offload_mutex = OVS_MUTEX_INITIALIZER;
+
+void
+dp_offload_initialize(void)
+{
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+ if (ovsthread_once_start(&once)) {
+ for (int i = 0; i < ARRAY_SIZE(base_dpif_offload_classes); i++) {
+ dp_offload_register_provider(base_dpif_offload_classes[i]);
+ }
+ ovsthread_once_done(&once);
+ }
+}
+
+static int
+dp_offload_register_provider__(const struct dpif_offload_class *new_class)
+ OVS_REQUIRES(dpif_offload_mutex)
+{
+ struct registered_dpif_offload_class *registered_class;
+ int error;
+
+ if (shash_find(&dpif_offload_classes, new_class->type)) {
+ VLOG_WARN("Attempted to register duplicate datapath offload "
+ "provider: %s", new_class->type);
+ return EEXIST;
+ }
+
+ error = new_class->init ? new_class->init() : 0;
+ if (error) {
+ VLOG_WARN("Failed to initialize %s datapath offload class: %s",
+ new_class->type, ovs_strerror(error));
+ return error;
+ }
+
+ registered_class = xmalloc(sizeof *registered_class);
+ registered_class->offload_class = new_class;
+ registered_class->refcount = 0;
+
+ shash_add(&dpif_offload_classes, new_class->type, registered_class);
+
+ return 0;
+}
+
+void dpif_offload_close(struct dpif *dpif)
+{
+ if (dpif->offload_class) {
+ struct registered_dpif_offload_class *rc;
+
+ rc = shash_find_data(&dpif_offload_classes, dpif->offload_class->type);
+ dp_offload_class_unref(rc);
+ }
+}
+
+int
+dp_offload_register_provider(const struct dpif_offload_class *new_class)
+{
+ int error;
+
+ ovs_mutex_lock(&dpif_offload_mutex);
+ error = dp_offload_register_provider__(new_class);
+ ovs_mutex_unlock(&dpif_offload_mutex);
+
+ return error;
+}
+
+/* Unregisters an offload datapath provider. 'type' must have been previously
+ * registered and not currently be in use by any dpifs. After unregistration
+ * new offload datapaths of that type cannot be opened using dpif_open(). */
+static int
+dp_offload_unregister_provider__(const char *type)
+ OVS_REQUIRES(dpif_offload_mutex)
+{
+ struct shash_node *node;
+ struct registered_dpif_offload_class *registered_class;
+
+ node = shash_find(&dpif_offload_classes, type);
+ if (!node) {
+ return EAFNOSUPPORT;
+ }
+
+ registered_class = node->data;
+ if (registered_class->refcount) {
+ VLOG_WARN("Attempted to unregister in use offload datapath provider: "
+ "%s", type);
+ return EBUSY;
+ }
+
+ if (registered_class->offload_class->destroy) {
+ registered_class->offload_class->destroy();
+ }
+ shash_delete(&dpif_offload_classes, node);
+ free(registered_class);
+
+ return 0;
+}
+
+/* Unregisters an offload datapath provider. 'type' must have been previously
+ * registered and not currently be in use by any dpifs. After unregistration
+ * new offload datapaths of that type cannot be opened using dpif_open(). */
+int
+dp_offload_unregister_provider(const char *type)
+{
+ int error;
+
+ dp_offload_initialize();