Signed-off-by: William Tu <u9012...@gmail.com>
---
v5:
- rebase to master
Feedbacks from Eelco:
- Remove xdp-obj="__default__" case, to remove xdp-obj, use
ovs-vsctl remove int <dev> options xdp-obj
- Fix problem of xdp program not unloading
verify by bpftool.
- use xdp-obj instead of xdpobj
- Limitation: xdp-obj doesn't work when using best-effort-mode
because best-effort mode tried to probe mode by setting up
queue,
and loading xdp-obj requires knwoing mode in advance.
(to support it, we might need to use the
XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD as in v3)
Testing
- I place two xdp binary here
https://drive.google.com/open?id=1QCCdNE-5CwlKCFV6Upg9mOPnnbVkUwA5
[xdpsock_pass.o] Working one, which forwards packets to
dpif-netdev
[xdpsock_invalid.o] invalid one, which has no map
v4:
Feedbacks from Eelco.
- First load the program, then configure xsk.
Let API take care of xdp prog and map loading, don't set
XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD.
- When loading custom xdp, need to close(prog_fd) and
close(map_fd)
to release the resources
- make sure prog and map is unloaded by bpftool.
- update doc, afxdp.rst
- Tested-at:
https://travis-ci.org/williamtu/ovs-travis/builds/608986781
v3:
Feedbacks from Eelco.
- keep using xdpobj not xdp-obj (because we alread use xdpmode)
or we change both to xdp-obj and xdp-mode?
- log a info message when using external program for better
debugging
- combine some failure messages
- update doc
NEW:
- add options:xdpobj=__default__, to set back to libbpf default
prog
- Tested-at:
https://travis-ci.org/williamtu/ovs-travis/builds/606153231
v2:
A couple fixes and remove RFC
---
Documentation/intro/install/afxdp.rst | 59 +++++++++++++++
NEWS | 2 +
lib/netdev-afxdp.c | 139
++++++++++++++++++++++++++++++++--
lib/netdev-linux-private.h | 4 +
4 files changed, 197 insertions(+), 7 deletions(-)
diff --git a/Documentation/intro/install/afxdp.rst
b/Documentation/intro/install/afxdp.rst
index 15e3c918f942..e72bb3edabe6 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -283,6 +283,65 @@ Or, use OVS pmd tool::
ovs-appctl dpif-netdev/pmd-stats-show
+Loading Custom XDP Program
+--------------------------
+By defailt, netdev-afxdp always forwards all packets to userspace
because
+it is using libbpf's default XDP program. There are some cases when
users
+want to keep packets in kernel instead of sending to userspace, for
example,
+management traffic such as SSH should be processed in kernel. This
can be
+done by loading the user-provided XDP program::
+
+ ovs-vsctl -- set int afxdp-p0 options:xdp-obj=<path/to/xdp/obj>
+
+So users can implement their filtering logic or traffic steering idea
+in their XDP program, and rest of the traffic passes to AF_XDP socket
+handled by OVS. To set it back to default, use::
+
+ ovs-vsctl remove int afxdp-p0 options xdp-obj
+
+Below is a sample C program compiled under kernel's samples/bpf/.
+
+.. code-block:: c
+
+ #include <uapi/linux/bpf.h>
+ #include "bpf_helpers.h"
+
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)
+ /* Kernel version before 5.3 needed an additional map */
+ struct bpf_map_def SEC("maps") qidconf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 64,
+ };
+ #endif
+
+ /* OVS will associate map 'xsks_map' to xsk socket. */
+ struct bpf_map_def SEC("maps") xsks_map = {
+ .type = BPF_MAP_TYPE_XSKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 32,
+ };
+
+ SEC("xdp_sock")
+ int xdp_sock_prog(struct xdp_md *ctx)
+ {
+ int index = ctx->rx_queue_index;
+
+ /* Customized by user.
+ * For example
+ * 1) filter out all SSH traffic and return XDP_PASS
+ * for kernel to process.
+ * 2) Drop unwanted packet by returning XDP_DROP.
+ */
+
+ /* Rest of packets goes to AF_XDP. */
+ return bpf_redirect_map(&xsks_map, index, 0);
+ }
+ char _license[] SEC("license") = "GPL";
+
+
Example Script
--------------
diff --git a/NEWS b/NEWS
index 100d7b6a8c29..d5b3063fb606 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,8 @@ Post-v2.12.0
generic - former SKB
best-effort [default] - new one, chooses the best available
from
3 above modes
+ * New option 'xdp-obj' for loading custom XDP program. Default
uses
+ the libbpf builtin XDP program.
- DPDK:
* DPDK pdump packet capture support disabled by default. New
configure
option '--enable-dpdk-pdump' to enable it.
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index ca2dfd005b9f..5d79a629f69b 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -21,6 +21,7 @@
#include "netdev-afxdp.h"
#include "netdev-afxdp-pool.h"
+#include <bpf/bpf.h>
#include <errno.h>
#include <inttypes.h>
#include <linux/rtnetlink.h>
@@ -30,6 +31,7 @@
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/socket.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
@@ -92,7 +94,8 @@ static struct xsk_socket_info *xsk_configure(int
ifindex, int xdp_queue_id,
enum afxdp_mode mode,
bool use_need_wakeup,
bool
report_socket_failures);
-static void xsk_remove_xdp_program(uint32_t ifindex, enum
afxdp_mode);
+static void xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode,
+ int prog_fd, int map_fd);
static void xsk_destroy(struct xsk_socket_info *xsk);
static int xsk_configure_all(struct netdev *netdev);
static void xsk_destroy_all(struct netdev *netdev);
@@ -247,6 +250,23 @@ netdev_afxdp_sweep_unused_pools(void *aux
OVS_UNUSED)
ovs_mutex_unlock(&unused_pools_mutex);
}
+static int
+xsk_load_prog(const char *path, struct bpf_object **obj,
+ int *prog_fd)
+{
+ struct bpf_prog_load_attr attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .file = path,
+ };
+
+ if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
+ VLOG_ERR("Can't load XDP program at '%s'", path);
+ return EINVAL;
+ }
+
+ return 0;
+}
+
static struct xsk_umem_info *
xsk_configure_umem(void *buffer, uint64_t size)
{
@@ -463,6 +483,50 @@ xsk_configure_queue(struct netdev_linux *dev, int
ifindex, int queue_id,
return 0;
}
+static int
+xsk_configure_prog(struct netdev *netdev, int ifindex)
+{
+ struct netdev_linux *dev = netdev_linux_cast(netdev);
+ struct bpf_object *obj;
+ uint32_t prog_id = 0;
+ uint32_t flags;
+ int prog_fd = 0;
+ int map_fd = 0;
+ int mode;
+ int ret;
+
+ mode = dev->xdp_mode_in_use;
+ flags = xdp_modes[mode].xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST;
+
+ ret = xsk_load_prog(dev->xdp_obj, &obj, &prog_fd);
+ if (ret) {
+ goto err;
+ }
+ dev->prog_fd = prog_fd;
+
+ bpf_set_link_xdp_fd(ifindex, prog_fd, flags);
+ ret = bpf_get_link_xdp_id(ifindex, &prog_id, flags);
+ if (ret < 0) {
+ VLOG_ERR("%s: Cannot get XDP prog id.",
+ netdev_get_name(netdev));
+ goto err;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "xsks_map");
+ if (map_fd < 0) {
+ VLOG_ERR("%s: Cannot find \"xsks_map\".",
+ netdev_get_name(netdev));
+ goto err;
+ }
+ dev->map_fd = map_fd;
+
+ VLOG_INFO("%s: Loaded custom XDP program at %s prog_id %d.",
+ netdev_get_name(netdev), dev->xdp_obj, prog_id);
+ return 0;
+
+err:
+ return ret;
+}
static int
xsk_configure_all(struct netdev *netdev)
@@ -499,6 +563,13 @@ xsk_configure_all(struct netdev *netdev)
qid++;
} else {
dev->xdp_mode_in_use = dev->xdp_mode;
+ if (dev->xdp_obj) {
+ /* XDP program is per-netdev, so all queues share
+ * the same XDP program. */
+ if (xsk_configure_prog(netdev, ifindex)) {
+ goto err;
+ }
+ }
}
/* Configure remaining queues. */
@@ -573,7 +644,12 @@ xsk_destroy_all(struct netdev *netdev)
VLOG_INFO("%s: Removing xdp program.", netdev_get_name(netdev));
ifindex = linux_get_ifindex(netdev_get_name(netdev));
- xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use);
+ xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use,
dev->prog_fd,
+ dev->map_fd);
+ dev->prog_fd = 0;
+ dev->map_fd = 0;
+ free(CONST_CAST(char *, dev->xdp_obj));
+ dev->xdp_obj = NULL;
if (dev->tx_locks) {
for (i = 0; i < netdev_n_txq(netdev); i++) {
@@ -590,9 +666,11 @@ netdev_afxdp_set_config(struct netdev *netdev,
const struct smap *args,
{
struct netdev_linux *dev = netdev_linux_cast(netdev);
const char *str_xdp_mode;
+ const char *str_xdp_obj;
enum afxdp_mode xdp_mode;
bool need_wakeup;
int new_n_rxq;
+ struct stat s;
ovs_mutex_lock(&dev->mutex);
new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
@@ -626,12 +704,34 @@ netdev_afxdp_set_config(struct netdev *netdev,
const struct smap *args,
}
#endif
+ str_xdp_obj = smap_get_def(args, "xdp-obj", NULL);
+ if (str_xdp_obj) {
+ if (stat(str_xdp_obj, &s)) {
+ ovs_mutex_unlock(&dev->mutex);
+ VLOG_ERR("Invalid xdp-obj '%s': %s.", str_xdp_obj,
+ ovs_strerror(errno));
+ return EINVAL;
+ } else if (!S_ISREG(s.st_mode)) {
+ ovs_mutex_unlock(&dev->mutex);
+ VLOG_ERR("xdp-obj '%s' is not a regular file.",
str_xdp_obj);
+ return EINVAL;
+ }
+ }
+
+ if (str_xdp_obj && xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
+ ovs_mutex_unlock(&dev->mutex);
+ VLOG_ERR("best-effort mode and xdp-obj can't be set
together");
+ return EINVAL;
+ }
+
if (dev->requested_n_rxq != new_n_rxq
|| dev->requested_xdp_mode != xdp_mode
- || dev->requested_need_wakeup != need_wakeup) {
+ || dev->requested_need_wakeup != need_wakeup
+ || !nullable_string_is_equal(dev->requested_xdp_obj,
str_xdp_obj)) {
dev->requested_n_rxq = new_n_rxq;
dev->requested_xdp_mode = xdp_mode;
dev->requested_need_wakeup = need_wakeup;
+ dev->requested_xdp_obj = nullable_xstrdup(str_xdp_obj);
netdev_request_reconfigure(netdev);
}
ovs_mutex_unlock(&dev->mutex);
@@ -650,6 +750,8 @@ netdev_afxdp_get_config(const struct netdev
*netdev, struct smap *args)
xdp_modes[dev->xdp_mode_in_use].name);
smap_add_format(args, "use-need-wakeup", "%s",
dev->use_need_wakeup ? "true" : "false");
+ smap_add_format(args, "xdp-obj", "%s",
+ dev->xdp_obj ? dev->xdp_obj : "builtin");
ovs_mutex_unlock(&dev->mutex);
return 0;
}
@@ -666,7 +768,8 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
if (netdev->n_rxq == dev->requested_n_rxq
&& dev->xdp_mode == dev->requested_xdp_mode
&& dev->use_need_wakeup == dev->requested_need_wakeup
- && dev->xsks) {
+ && dev->xsks
+ && nullable_string_is_equal(dev->xdp_obj,
dev->requested_xdp_obj)) {
goto out;
}
@@ -684,6 +787,8 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
}
dev->use_need_wakeup = dev->requested_need_wakeup;
+ dev->xdp_obj = dev->requested_xdp_obj;
+
err = xsk_configure_all(netdev);
if (err) {
VLOG_ERR("%s: AF_XDP device reconfiguration failed.",
@@ -707,11 +812,28 @@ netdev_afxdp_get_numa_id(const struct netdev
*netdev)
}
static void
-xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode mode)
+xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode mode,
+ int prog_fd, int map_fd)
{
uint32_t flags = xdp_modes[mode].xdp_flags |
XDP_FLAGS_UPDATE_IF_NOEXIST;
+ uint32_t prog_id;
+ int ret;
+
+ bpf_get_link_xdp_id(ifindex, &prog_id, flags);
+
+ ret = bpf_set_link_xdp_fd(ifindex, -1, flags);
+ if (ret) {
+ VLOG_ERR("Failed to unload prog ID: %d", prog_id);
+ }
+
+ if (prog_fd) {
+ close(prog_fd);
+ }
+ if (map_fd) {
+ close(map_fd);
+ }
- bpf_set_link_xdp_fd(ifindex, -1, flags);
+ VLOG_INFO("Removed XDP program ID: %d", prog_id);
}
void
@@ -723,7 +845,8 @@ signal_remove_xdp(struct netdev *netdev)
ifindex = linux_get_ifindex(netdev_get_name(netdev));
VLOG_WARN("Force removing xdp program.");
- xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use);
+ xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use,
+ dev->prog_fd, dev->map_fd);
}
static struct dp_packet_afxdp *
@@ -1136,10 +1259,12 @@ netdev_afxdp_construct(struct netdev *netdev)
netdev->n_txq = 0;
dev->xdp_mode = OVS_AF_XDP_MODE_UNSPEC;
dev->xdp_mode_in_use = OVS_AF_XDP_MODE_UNSPEC;
+ dev->xdp_obj = NULL;
dev->requested_n_rxq = NR_QUEUE;
dev->requested_xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT;
dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
+ dev->requested_xdp_obj = NULL;
dev->xsks = NULL;
dev->tx_locks = NULL;
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index 8873caa9d412..fe706c2d82fe 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -109,6 +109,10 @@ struct netdev_linux {
bool requested_need_wakeup;
struct ovs_spin *tx_locks; /* spin lock array for TX queues. */
+ const char *xdp_obj; /* XDP object file path. */
+ const char *requested_xdp_obj;
+ int prog_fd;
+ int map_fd;
#endif
};
--
2.7.4