The per-CPU variables used during bpf_prog_run_xdp() invocation and later during xdp_do_redirect() rely on disabled BH for their protection. Without locking in local_bh_disable() on PREEMPT_RT these data structure require explicit locking.
This is a follow-up on the previous change which introduced bpf_run_lock.redirect_lock and uses it now within drivers. The simple way is to acquire the lock before bpf_prog_run_xdp() is invoked and hold it until the end of function. This does not always work because some drivers (cpsw, atlantic) invoke xdp_do_flush() in the same context. Acquiring the lock in bpf_prog_run_xdp() and dropping in xdp_do_redirect() (without touching drivers) does not work because not all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and invoke xdp_do_redirect()). Ideally the minimal locking scope would be bpf_prog_run_xdp() + xdp_do_redirect() and everything else (error recovery, DMA unmapping, free/ alloc of memory, …) would happen outside of the locked section. Cc: "K. Y. Srinivasan" <k...@microsoft.com> Cc: "Michael S. Tsirkin" <m...@redhat.com> Cc: Alexei Starovoitov <a...@kernel.org> Cc: Andrii Nakryiko <and...@kernel.org> Cc: Dexuan Cui <de...@microsoft.com> Cc: Haiyang Zhang <haiya...@microsoft.com> Cc: Hao Luo <hao...@google.com> Cc: Jesper Dangaard Brouer <h...@kernel.org> Cc: Jiri Olsa <jo...@kernel.org> Cc: John Fastabend <john.fastab...@gmail.com> Cc: Juergen Gross <jgr...@suse.com> Cc: KP Singh <kpsi...@kernel.org> Cc: Martin KaFai Lau <martin....@linux.dev> Cc: Nikolay Aleksandrov <ra...@blackwall.org> Cc: Song Liu <s...@kernel.org> Cc: Stanislav Fomichev <s...@google.com> Cc: Stefano Stabellini <sstabell...@kernel.org> Cc: Wei Liu <wei....@kernel.org> Cc: Willem de Bruijn <willemdebruijn.ker...@gmail.com> Cc: Xuan Zhuo <xuanz...@linux.alibaba.com> Cc: Yonghong Song <yonghong.s...@linux.dev> Cc: b...@vger.kernel.org Cc: virtualizat...@lists.linux.dev Cc: xen-de...@lists.xenproject.org Signed-off-by: Sebastian Andrzej Siewior <bige...@linutronix.de> --- drivers/net/hyperv/netvsc_bpf.c | 1 + drivers/net/netkit.c | 13 +++++++---- drivers/net/tun.c | 28 +++++++++++++---------- drivers/net/veth.c | 40 ++++++++++++++++++++------------- drivers/net/virtio_net.c | 1 + drivers/net/xen-netfront.c | 1 + 6 files changed, 52 insertions(+), 32 deletions(-) diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 4a9522689fa4f..55f8ca92ca199 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -58,6 +58,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, memcpy(xdp->data, data, len); + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(prog, xdp); switch (act) { diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 39171380ccf29..fbcf78477bda8 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -80,8 +80,15 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer))); skb->dev = peer; entry = rcu_dereference(nk->active); - if (entry) - ret = netkit_run(entry, skb, ret); + if (entry) { + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + ret = netkit_run(entry, skb, ret); + if (ret == NETKIT_REDIRECT) { + dev_sw_netstats_tx_add(dev, 1, len); + skb_do_redirect(skb); + } + } + } switch (ret) { case NETKIT_NEXT: case NETKIT_PASS: @@ -95,8 +102,6 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) } break; case NETKIT_REDIRECT: - dev_sw_netstats_tx_add(dev, 1, len); - skb_do_redirect(skb); break; case NETKIT_DROP: default: diff --git a/drivers/net/tun.c b/drivers/net/tun.c index afa5497f7c35c..fe0d31f11e4b6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1708,16 +1708,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq); xdp_prepare_buff(&xdp, buf, pad, len, false); - act = bpf_prog_run_xdp(xdp_prog, &xdp); - if (act == XDP_REDIRECT || act == XDP_TX) { - get_page(alloc_frag->page); - alloc_frag->offset += buflen; - } - err = tun_xdp_act(tun, xdp_prog, &xdp, act); - if (err < 0) { - if (act == XDP_REDIRECT || act == XDP_TX) - put_page(alloc_frag->page); - goto out; + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + act = bpf_prog_run_xdp(xdp_prog, &xdp); + if (act == XDP_REDIRECT || act == XDP_TX) { + get_page(alloc_frag->page); + alloc_frag->offset += buflen; + } + err = tun_xdp_act(tun, xdp_prog, &xdp, act); + if (err < 0) { + if (act == XDP_REDIRECT || act == XDP_TX) + put_page(alloc_frag->page); + goto out; + } } if (err == XDP_REDIRECT) @@ -2460,8 +2462,10 @@ static int tun_xdp_one(struct tun_struct *tun, xdp_init_buff(xdp, buflen, &tfile->xdp_rxq); xdp_set_data_meta_invalid(xdp); - act = bpf_prog_run_xdp(xdp_prog, xdp); - ret = tun_xdp_act(tun, xdp_prog, xdp, act); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + act = bpf_prog_run_xdp(xdp_prog, xdp); + ret = tun_xdp_act(tun, xdp_prog, xdp, act); + } if (ret < 0) { put_page(virt_to_head_page(xdp->data)); return ret; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 977861c46b1fe..c69e5ff9f8795 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -624,7 +624,18 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, xdp->rxq = &rq->xdp_rxq; vxbuf.skb = NULL; - act = bpf_prog_run_xdp(xdp_prog, xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + act = bpf_prog_run_xdp(xdp_prog, xdp); + if (act == XDP_REDIRECT) { + orig_frame = *frame; + xdp->rxq->mem = frame->mem; + if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { + frame = &orig_frame; + stats->xdp_drops++; + goto err_xdp; + } + } + } switch (act) { case XDP_PASS: @@ -644,13 +655,6 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: - orig_frame = *frame; - xdp->rxq->mem = frame->mem; - if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { - frame = &orig_frame; - stats->rx_drops++; - goto err_xdp; - } stats->xdp_redirect++; rcu_read_unlock(); goto xdp_xmit; @@ -857,7 +861,18 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, orig_data = xdp->data; orig_data_end = xdp->data_end; - act = bpf_prog_run_xdp(xdp_prog, xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + act = bpf_prog_run_xdp(xdp_prog, xdp); + if (act == XDP_REDIRECT) { + veth_xdp_get(xdp); + consume_skb(skb); + xdp->rxq->mem = rq->xdp_mem; + if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { + stats->rx_drops++; + goto err_xdp; + } + } + } switch (act) { case XDP_PASS: @@ -875,13 +890,6 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: - veth_xdp_get(xdp); - consume_skb(skb); - xdp->rxq->mem = rq->xdp_mem; - if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { - stats->rx_drops++; - goto err_xdp; - } stats->xdp_redirect++; rcu_read_unlock(); goto xdp_xmit; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d16f592c2061f..5e362c4604239 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1010,6 +1010,7 @@ static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, int err; u32 act; + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(xdp_prog, xdp); u64_stats_inc(&stats->xdp_packets); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ad29f370034e4..e3daa8cdeb84e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -978,6 +978,7 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM, len, false); + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_TX: -- 2.43.0