On 26.09.2019 21:29, William Tu wrote:
The patch adds support for using need_wakeup flag in AF_XDP rings.
A new option, use_need_wakeup, is added. When this option is used,
it means that OVS has to explicitly wake up the kernel RX, using poll()
syscall and wake up TX, using sendto() syscall. This feature improves
the performance by avoiding unnecessary sendto syscalls for TX.
For RX, instead of kernel always busy-spinning on fille queue, OVS wakes
up the kernel RX processing when fill queue is replenished.

The need_wakeup feature is merged into Linux kernel bpf-next tee with commit
77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") and
OVS enables it by default. Running the feature before this version causes
xsk bind fails, please use options:use_need_wakeup=false to disable it.
If users enable it but runs in an older version of libbpf, then the
need_wakeup feature has no effect, and a warning message is logged.

For virtual interface, it's better set use_need_wakeup=false, since
the virtual device's AF_XDP xmit is synchronous: the sendto syscall
enters kernel and process the TX packet on tx queue directly.

On Intel Xeon E5-2620 v3 2.4GHz system, performance of physical port
to physical port improves from 6.1Mpps to 7.3Mpps.

Suggested-by: Ilya Maximets <[email protected]>
Signed-off-by: William Tu <[email protected]>
---
v5:
- address feedback from Ilya
   - update commit msg about kernel version using bpf-next tree
   - remove __func__, and use DBG in log_
   - fix alignment
   - remove the kernel version requirement for need_wakeup, we can
     wait until tag is available

v4:
- move use_need_wakeup check inside xsk_rx_wakeup_if_needed

v3:
- add warning when user enables it but libbpf not support it
- revise documentation

v2:
- address feedbacks from Ilya and Eelco
- add options:use_need_wakeup, default to true
- remove poll timeout=1sec, make poll() return immediately
- naming change: rename to xsk_rx_wakeup_if_needing
- fix indents and return value for errno
---
  Documentation/intro/install/afxdp.rst |  15 ++++-
  acinclude.m4                          |   5 ++
  lib/netdev-afxdp.c                    | 108 ++++++++++++++++++++++++++++++----
  lib/netdev-linux-private.h            |   2 +
  vswitchd/vswitch.xml                  |  12 ++++
  5 files changed, 126 insertions(+), 16 deletions(-)

diff --git a/Documentation/intro/install/afxdp.rst 
b/Documentation/intro/install/afxdp.rst
index 820e9d993d8f..545516b2bbec 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -176,9 +176,18 @@ in :doc:`general`::
    ovs-vswitchd ...
    ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
-Make sure your device driver support AF_XDP, and to use 1 PMD (on core 4)
-on 1 queue (queue 0) device, configure these options: **pmd-cpu-mask,
-pmd-rxq-affinity, and n_rxq**. The **xdpmode** can be "drv" or "skb"::
+Make sure your device driver support AF_XDP, netdev-afxdp supports
+the following additional options (see man ovs-vswitchd.conf.db for
+more details):
+
+ * **xdpmode**: use "drv" for driver mode, or "skb" for skb mode.
+
+ * **use_need_wakeup**: disable by setting to "false", otherwise default
+   is "true"
+
+For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device,
+configure these options: **pmd-cpu-mask, pmd-rxq-affinity, and n_rxq**.
+The **xdpmode** can be "drv" or "skb"::
ethtool -L enp2s0 combined 1
    ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x10
diff --git a/acinclude.m4 b/acinclude.m4
index f0e38898b17a..f1ecb86adf7c 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
                [Define to 1 if AF_XDP support is available and enabled.])
      LIBBPF_LDADD=" -lbpf -lelf"
      AC_SUBST([LIBBPF_LDADD])
+
+    AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
+      AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1],
+        [XDP need wakeup support detected in xsk.h.])
+    ], [], [[#include <bpf/xsk.h>]])
    fi
    AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
  ])
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 6e01803272aa..fee9413bfd0a 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -26,6 +26,7 @@
  #include <linux/rtnetlink.h>
  #include <linux/if_xdp.h>
  #include <net/if.h>
+#include <poll.h>
  #include <stdlib.h>
  #include <sys/resource.h>
  #include <sys/socket.h>
@@ -82,7 +83,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
  #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
-                                             int mode);
+                                             int mode, bool use_need_wakeup);
  static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
  static void xsk_destroy(struct xsk_socket_info *xsk);
  static int xsk_configure_all(struct netdev *netdev);
@@ -117,6 +118,54 @@ struct xsk_socket_info {
      atomic_uint64_t tx_dropped;
  };
+#ifdef HAVE_XDP_NEED_WAKEUP
+static inline void
+xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
+                        struct netdev *netdev, int fd)
+{
+    struct netdev_linux *dev = netdev_linux_cast(netdev);
+    struct pollfd pfd;
+    int ret;
+
+    if (!dev->use_need_wakeup) {
+        return;
+    }
+
+    if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+        pfd.fd = fd;
+        pfd.events = POLLIN;
+
+        ret = poll(&pfd, 1, 0);
+        if (OVS_UNLIKELY(ret < 0)) {
+            VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
+                         netdev_get_name(netdev),
+                         ovs_strerror(errno));
+        }
+    }
+}
+
+static inline bool
+xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
+{
+    return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
+}
+
+#else /* !HAVE_XDP_NEED_WAKEUP */
+static inline void
+xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
+                        struct netdev *netdev OVS_UNUSED,
+                        int fd OVS_UNUSED)
+{
+    /* Nothing. */
+}
+
+static inline bool
+xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
+{
+    return true;
+}
+#endif /* HAVE_XDP_NEED_WAKEUP */
+
  static void
  netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
  {
@@ -234,7 +283,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
static struct xsk_socket_info *
  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
-                     uint32_t queue_id, int xdpmode)
+                     uint32_t queue_id, int xdpmode, bool use_need_wakeup)
  {
      struct xsk_socket_config cfg;
      struct xsk_socket_info *xsk;
@@ -257,6 +306,12 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t 
ifindex,
          cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
      }
+ if (use_need_wakeup) {
+#ifdef HAVE_XDP_NEED_WAKEUP
+        cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
+#endif
+    }
+
      if (if_indextoname(ifindex, devname) == NULL) {
          VLOG_ERR("ifindex %d to devname failed (%s)",
                   ifindex, ovs_strerror(errno));
@@ -267,9 +322,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t 
ifindex,
      ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem,
                               &xsk->rx, &xsk->tx, &cfg);
      if (ret) {
-        VLOG_ERR("xsk_socket__create failed (%s) mode: %s qid: %d",
+        VLOG_ERR("xsk_socket__create failed (%s) mode: %s "
+                 "use_need_wakeup: %s qid: %d",
                   ovs_strerror(errno),
                   xdpmode == XDP_COPY ? "SKB": "DRV",
+                 use_need_wakeup ? "true" : "false",
                   queue_id);
          free(xsk);
          return NULL;
@@ -311,7 +368,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t 
ifindex,
  }
static struct xsk_socket_info *
-xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
+xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
+              bool use_need_wakeup)
  {
      struct xsk_socket_info *xsk;
      struct xsk_umem_info *umem;
@@ -334,7 +392,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem); - xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
+    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
+                               use_need_wakeup);
      if (!xsk) {
          /* Clean up umem and xpacket pool. */
          if (xsk_umem__delete(umem->umem)) {
@@ -365,9 +424,12 @@ xsk_configure_all(struct netdev *netdev)
/* Configure each queue. */
      for (i = 0; i < n_rxq; i++) {
-        VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
-                  dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
-        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
+        VLOG_DBG("%s: configure queue %d mode %s use_need_wakeup %s.",
+                 netdev_get_name(netdev), i,
+                 dev->xdpmode == XDP_COPY ? "SKB" : "DRV",
+                 dev->use_need_wakeup ? "true" : "false");
+        xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
+                                 dev->use_need_wakeup);
          if (!xsk_info) {
              VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
              dev->xsks[i] = NULL;
@@ -459,6 +521,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct 
smap *args,
      struct netdev_linux *dev = netdev_linux_cast(netdev);
      const char *str_xdpmode;
      int xdpmode, new_n_rxq;
+    bool need_wakeup;
ovs_mutex_lock(&dev->mutex);
      new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
@@ -481,10 +544,19 @@ netdev_afxdp_set_config(struct netdev *netdev, const 
struct smap *args,
          return EINVAL;
      }
+ need_wakeup = smap_get_bool(args, "use_need_wakeup", true);
+    if (need_wakeup) {
+#ifndef HAVE_XDP_NEED_WAKEUP
+        VLOG_WARN("XDP need_wakeup is not supported in libbpf.");

This warning breaks all the tests on system without need_wakeup support.
It seems better to enable this feature by default only if supported by libbpf.

One more thing that we need to actually drop need_wakeup to false here,
otherwise get_config() and some log messages will report wrong information.

Suggesting following incremental:

diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index a516cc727..60259bdd3 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -68,6 +68,12 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
20);
 #define PROD_NUM_DESCS      XSK_RING_PROD__DEFAULT_NUM_DESCS
 #define CONS_NUM_DESCS      XSK_RING_CONS__DEFAULT_NUM_DESCS
+#ifdef HAVE_XDP_NEED_WAKEUP
+#define NEED_WAKEUP_DEFAULT true
+#else
+#define NEED_WAKEUP_DEFAULT false
+#endif
+
 /* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
  * still on processing in threads. Number of packets currently in OVS
  * processing is hard to estimate because it depends on number of ports.
@@ -307,11 +313,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t 
ifindex,
         cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
     }
- if (use_need_wakeup) {
 #ifdef HAVE_XDP_NEED_WAKEUP
+    if (use_need_wakeup) {
         cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
-#endif
     }
+#endif
if (if_indextoname(ifindex, devname) == NULL) {
         VLOG_ERR("ifindex %d to devname failed (%s)",
@@ -545,12 +551,13 @@ netdev_afxdp_set_config(struct netdev *netdev, const 
struct smap *args,
         return EINVAL;
     }
- need_wakeup = smap_get_bool(args, "use_need_wakeup", true);
-    if (need_wakeup) {
+    need_wakeup = smap_get_bool(args, "use_need_wakeup", NEED_WAKEUP_DEFAULT);
 #ifndef HAVE_XDP_NEED_WAKEUP
+    if (need_wakeup) {
         VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
-#endif
+        need_wakeup = false;
     }
+#endif
if (dev->requested_n_rxq != new_n_rxq
         || dev->requested_xdpmode != xdpmode
@@ -1049,7 +1056,7 @@ netdev_afxdp_construct(struct netdev *netdev)
dev->requested_n_rxq = NR_QUEUE;
     dev->requested_xdpmode = XDP_COPY;
-    dev->requested_need_wakeup = true;
+    dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
dev->xsks = NULL;
     dev->tx_locks = NULL;
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 24d061891..decccfe84 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3130,7 +3130,7 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch 
options:peer=p1 \
           syscall and wakes up TX, using sendto() syscall. For physical
           devices, this feature improves the performance by avoiding
           unnecessary sendto syscalls.
-          Defaults is true.
+          Defaults to true if supported by libbpf.
         </p>
       </column>
---

Some updates about default value also will be needed in commit message and,
probably, in afxdp docs.

Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to