commit:     330a960e8ef0c8c0f12c0fc4d668e36aa8e64600
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Mon Jul  3 16:59:11 2023 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Mon Jul  3 16:59:11 2023 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=330a960e

wireguard: queueing: use saner cpu selection wrapping

Bug: https://bugs.gentoo.org/909066

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

 0000_README                                        |   4 +
 2400_wireguard-queueing-cpu-sel-wrapping-fix.patch | 116 +++++++++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/0000_README b/0000_README
index 22f83174..bda29555 100644
--- a/0000_README
+++ b/0000_README
@@ -63,6 +63,10 @@ Patch:  
2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch
 From:   
https://lore.kernel.org/linux-bluetooth/20190522070540.48895-1-mar...@holtmann.org/raw
 Desc:   Bluetooth: Check key sizes only when Secure Simple Pairing is enabled. 
See bug #686758
 
+Patch:  2400_wireguard-queueing-cpu-sel-wrapping-fix.patch
+From:   
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=7387943fa35516f6f8017a3b0e9ce48a3bef9faa
+Desc:   wireguard: queueing: use saner cpu selection wrapping
+
 Patch:  2900_tmp513-Fix-build-issue-by-selecting-CONFIG_REG.patch
 From:   https://bugs.gentoo.org/710790
 Desc:   tmp513 requies REGMAP_I2C to build.  Select it by default in Kconfig. 
See bug #710790. Thanks to Phil Stracchino

diff --git a/2400_wireguard-queueing-cpu-sel-wrapping-fix.patch 
b/2400_wireguard-queueing-cpu-sel-wrapping-fix.patch
new file mode 100644
index 00000000..fa199039
--- /dev/null
+++ b/2400_wireguard-queueing-cpu-sel-wrapping-fix.patch
@@ -0,0 +1,116 @@
+From 7387943fa35516f6f8017a3b0e9ce48a3bef9faa Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <ja...@zx2c4.com>
+Date: Mon, 3 Jul 2023 03:27:04 +0200
+Subject: wireguard: queueing: use saner cpu selection wrapping
+
+Using `% nr_cpumask_bits` is slow and complicated, and not totally
+robust toward dynamic changes to CPU topologies. Rather than storing the
+next CPU in the round-robin, just store the last one, and also return
+that value. This simplifies the loop drastically into a much more common
+pattern.
+
+Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
+Cc: sta...@vger.kernel.org
+Reported-by: Linus Torvalds <torva...@linux-foundation.org>
+Tested-by: Manuel Leiner <manuel.lei...@gmx.de>
+Signed-off-by: Jason A. Donenfeld <ja...@zx2c4.com>
+Signed-off-by: David S. Miller <da...@davemloft.net>
+---
+ drivers/net/wireguard/queueing.c |  1 +
+ drivers/net/wireguard/queueing.h | 25 +++++++++++--------------
+ drivers/net/wireguard/receive.c  |  2 +-
+ drivers/net/wireguard/send.c     |  2 +-
+ 4 files changed, 14 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/wireguard/queueing.c 
b/drivers/net/wireguard/queueing.c
+index 8084e7408c0ae..26d235d152352 100644
+--- a/drivers/net/wireguard/queueing.c
++++ b/drivers/net/wireguard/queueing.c
+@@ -28,6 +28,7 @@ int wg_packet_queue_init(struct crypt_queue *queue, 
work_func_t function,
+       int ret;
+ 
+       memset(queue, 0, sizeof(*queue));
++      queue->last_cpu = -1;
+       ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
+       if (ret)
+               return ret;
+diff --git a/drivers/net/wireguard/queueing.h 
b/drivers/net/wireguard/queueing.h
+index 125284b346a77..1ea4f874e367e 100644
+--- a/drivers/net/wireguard/queueing.h
++++ b/drivers/net/wireguard/queueing.h
+@@ -117,20 +117,17 @@ static inline int wg_cpumask_choose_online(int 
*stored_cpu, unsigned int id)
+       return cpu;
+ }
+ 
+-/* This function is racy, in the sense that next is unlocked, so it could 
return
+- * the same CPU twice. A race-free version of this would be to instead store 
an
+- * atomic sequence number, do an increment-and-return, and then iterate 
through
+- * every possible CPU until we get to that index -- choose_cpu. However that's
+- * a bit slower, and it doesn't seem like this potential race actually
+- * introduces any performance loss, so we live with it.
++/* This function is racy, in the sense that it's called while last_cpu is
++ * unlocked, so it could return the same CPU twice. Adding locking or using
++ * atomic sequence numbers is slower though, and the consequences of racing 
are
++ * harmless, so live with it.
+  */
+-static inline int wg_cpumask_next_online(int *next)
++static inline int wg_cpumask_next_online(int *last_cpu)
+ {
+-      int cpu = *next;
+-
+-      while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
+-              cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+-      *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
++      int cpu = cpumask_next(*last_cpu, cpu_online_mask);
++      if (cpu >= nr_cpu_ids)
++              cpu = cpumask_first(cpu_online_mask);
++      *last_cpu = cpu;
+       return cpu;
+ }
+ 
+@@ -159,7 +156,7 @@ static inline void wg_prev_queue_drop_peeked(struct 
prev_queue *queue)
+ 
+ static inline int wg_queue_enqueue_per_device_and_peer(
+       struct crypt_queue *device_queue, struct prev_queue *peer_queue,
+-      struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
++      struct sk_buff *skb, struct workqueue_struct *wq)
+ {
+       int cpu;
+ 
+@@ -173,7 +170,7 @@ static inline int wg_queue_enqueue_per_device_and_peer(
+       /* Then we queue it up in the device queue, which consumes the
+        * packet as soon as it can.
+        */
+-      cpu = wg_cpumask_next_online(next_cpu);
++      cpu = wg_cpumask_next_online(&device_queue->last_cpu);
+       if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+               return -EPIPE;
+       queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
+diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
+index 7135d51d2d872..0b3f0c8435509 100644
+--- a/drivers/net/wireguard/receive.c
++++ b/drivers/net/wireguard/receive.c
+@@ -524,7 +524,7 @@ static void wg_packet_consume_data(struct wg_device *wg, 
struct sk_buff *skb)
+               goto err;
+ 
+       ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, 
&peer->rx_queue, skb,
+-                                                 wg->packet_crypt_wq, 
&wg->decrypt_queue.last_cpu);
++                                                 wg->packet_crypt_wq);
+       if (unlikely(ret == -EPIPE))
+               wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD);
+       if (likely(!ret || ret == -EPIPE)) {
+diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
+index 5368f7c35b4bf..95c853b59e1da 100644
+--- a/drivers/net/wireguard/send.c
++++ b/drivers/net/wireguard/send.c
+@@ -318,7 +318,7 @@ static void wg_packet_create_data(struct wg_peer *peer, 
struct sk_buff *first)
+               goto err;
+ 
+       ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, 
&peer->tx_queue, first,
+-                                                 wg->packet_crypt_wq, 
&wg->encrypt_queue.last_cpu);
++                                                 wg->packet_crypt_wq);
+       if (unlikely(ret == -EPIPE))
+               wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD);
+ err:
+-- 
+cgit 

Reply via email to