Add tun_wake_queue() to tun.c and export it for use by vhost-net. The
function validates that the file belongs to a tun/tap device,
dereferences the tun_struct under RCU, and delegates to
__tun_wake_queue().

vhost_net_buf_produce() now calls tun_wake_queue() after a successful
batched consume of the ring to allow the netdev subqueue to be woken up.

Without the corresponding queue stopping (introduced in a subsequent
commit), this patch alone causes a slight throughput regression for a
tap+vhost-net setup sending to a qemu VM:
3.948 Mpps to 3.888 Mpps (-1.5%).

Details: AMD Ryzen 5 5600X at 4.3 GHz, 3200 MHz RAM, isolated QEMU
threads, XDP drop program active in VM, pktgen sender; Avg over
20 runs @ 100,000,000 packets. SRSO and spectre v2 mitigations disabled.

Co-developed-by: Tim Gebauer <[email protected]>
Signed-off-by: Tim Gebauer <[email protected]>
Signed-off-by: Simon Schippers <[email protected]>
---
 drivers/net/tun.c      | 21 +++++++++++++++++++++
 drivers/vhost/net.c    | 15 +++++++++++----
 include/linux/if_tun.h |  3 +++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a82d665dab5f..b86582cc6cb6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3760,6 +3760,27 @@ struct ptr_ring *tun_get_tx_ring(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_tx_ring);
 
+void tun_wake_queue(struct file *file)
+{
+       struct tun_file *tfile;
+       struct tun_struct *tun;
+
+       if (file->f_op != &tun_fops)
+               return;
+       tfile = file->private_data;
+       if (!tfile)
+               return;
+
+       rcu_read_lock();
+
+       tun = rcu_dereference(tfile->tun);
+       if (tun)
+               __tun_wake_queue(tun, tfile);
+
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tun_wake_queue);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 80965181920c..c8ef804ef28c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -176,13 +176,19 @@ static void *vhost_net_buf_consume(struct vhost_net_buf 
*rxq)
        return ret;
 }
 
-static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_produce(struct sock *sk,
+                                struct vhost_net_virtqueue *nvq)
 {
+       struct file *file = sk->sk_socket->file;
        struct vhost_net_buf *rxq = &nvq->rxq;
 
        rxq->head = 0;
        rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
                                              VHOST_NET_BATCH);
+
+       if (rxq->tail)
+               tun_wake_queue(file);
+
        return rxq->tail;
 }
 
@@ -209,14 +215,15 @@ static int vhost_net_buf_peek_len(void *ptr)
        return __skb_array_len_with_tag(ptr);
 }
 
-static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_peek(struct sock *sk,
+                             struct vhost_net_virtqueue *nvq)
 {
        struct vhost_net_buf *rxq = &nvq->rxq;
 
        if (!vhost_net_buf_is_empty(rxq))
                goto out;
 
-       if (!vhost_net_buf_produce(nvq))
+       if (!vhost_net_buf_produce(sk, nvq))
                return 0;
 
 out:
@@ -995,7 +1002,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, 
struct sock *sk)
        unsigned long flags;
 
        if (rvq->rx_ring)
-               return vhost_net_buf_peek(rvq);
+               return vhost_net_buf_peek(sk, rvq);
 
        spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
        head = skb_peek(&sk->sk_receive_queue);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 80166eb62f41..ab3b4ebca059 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -22,6 +22,7 @@ struct tun_msg_ctl {
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
+void tun_wake_queue(struct file *file);
 
 static inline bool tun_is_xdp_frame(void *ptr)
 {
@@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
        return ERR_PTR(-EINVAL);
 }
 
+static inline void tun_wake_queue(struct file *f) {}
+
 static inline bool tun_is_xdp_frame(void *ptr)
 {
        return false;
-- 
2.43.0


Reply via email to