From: Toshiaki Makita <makita.toshi...@lab.ntt.co.jp>

This allows further redirection of xdp_frames like

 NIC   -> veth--veth -> veth--veth
 (XDP)          (XDP)         (XDP)

The intermediate XDP, redirecting packets from NIC to the other veth,
reuses xdp_mem_info from NIC so that page recycling of the NIC works on
the destination veth's XDP.
In this way return_frame is not fully guarded by NAPI, since another
NAPI handler on another cpu may use the same xdp_mem_info concurrently.
Thus disable napi_direct by xdp_set_return_frame_no_direct() during the
NAPI context.

v4:
- Use xdp_[set|clear]_return_frame_no_direct() instead of a flag in
  xdp_mem_info.

v3:
- Fix double free when veth_xdp_tx() returns a positive value.
- Convert xdp_xmit and xdp_redir variables into flags.

Signed-off-by: Toshiaki Makita <makita.toshi...@lab.ntt.co.jp>
Signed-off-by: Toshiaki Makita <toshiaki.maki...@gmail.com>
---
 drivers/net/veth.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 110 insertions(+), 9 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index acdb1c543f4b..60397a8ea2e9 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -32,6 +32,10 @@
 #define VETH_RING_SIZE         256
 #define VETH_XDP_HEADROOM      (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
 
+/* Separating two types of XDP xmit */
+#define VETH_XDP_TX            BIT(0)
+#define VETH_XDP_REDIR         BIT(1)
+
 struct pcpu_vstats {
        u64                     packets;
        u64                     bytes;
@@ -45,6 +49,7 @@ struct veth_priv {
        struct bpf_prog         *_xdp_prog;
        struct net_device __rcu *peer;
        atomic64_t              dropped;
+       struct xdp_mem_info     xdp_mem;
        unsigned                requested_headroom;
        bool                    rx_notify_masked;
        struct ptr_ring         xdp_ring;
@@ -317,10 +322,42 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
        return n - drops;
 }
 
+static void veth_xdp_flush(struct net_device *dev)
+{
+       struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
+       struct net_device *rcv;
+
+       rcu_read_lock();
+       rcv = rcu_dereference(priv->peer);
+       if (unlikely(!rcv))
+               goto out;
+
+       rcv_priv = netdev_priv(rcv);
+       /* xdp_ring is initialized on receive side? */
+       if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog)))
+               goto out;
+
+       __veth_xdp_flush(rcv_priv);
+out:
+       rcu_read_unlock();
+}
+
+static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
+{
+       struct xdp_frame *frame = convert_to_xdp_frame(xdp);
+
+       if (unlikely(!frame))
+               return -EOVERFLOW;
+
+       return veth_xdp_xmit(dev, 1, &frame, 0);
+}
+
 static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
-                                       struct xdp_frame *frame)
+                                       struct xdp_frame *frame,
+                                       unsigned int *xdp_xmit)
 {
        int len = frame->len, delta = 0;
+       struct xdp_frame orig_frame;
        struct bpf_prog *xdp_prog;
        unsigned int headroom;
        struct sk_buff *skb;
@@ -344,6 +381,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv 
*priv,
                        delta = frame->data - xdp.data;
                        len = xdp.data_end - xdp.data;
                        break;
+               case XDP_TX:
+                       orig_frame = *frame;
+                       xdp.data_hard_start = frame;
+                       xdp.rxq->mem = frame->mem;
+                       if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
+                               trace_xdp_exception(priv->dev, xdp_prog, act);
+                               frame = &orig_frame;
+                               goto err_xdp;
+                       }
+                       *xdp_xmit |= VETH_XDP_TX;
+                       rcu_read_unlock();
+                       goto xdp_xmit;
+               case XDP_REDIRECT:
+                       orig_frame = *frame;
+                       xdp.data_hard_start = frame;
+                       xdp.rxq->mem = frame->mem;
+                       if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) {
+                               frame = &orig_frame;
+                               goto err_xdp;
+                       }
+                       *xdp_xmit |= VETH_XDP_REDIR;
+                       rcu_read_unlock();
+                       goto xdp_xmit;
                default:
                        bpf_warn_invalid_xdp_action(act);
                case XDP_ABORTED:
@@ -368,12 +428,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv 
*priv,
 err_xdp:
        rcu_read_unlock();
        xdp_return_frame(frame);
-
+xdp_xmit:
        return NULL;
 }
 
 static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
-                                       struct sk_buff *skb)
+                                       struct sk_buff *skb,
+                                       unsigned int *xdp_xmit)
 {
        u32 pktlen, headroom, act, metalen;
        void *orig_data, *orig_data_end;
@@ -444,6 +505,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv 
*priv,
        switch (act) {
        case XDP_PASS:
                break;
+       case XDP_TX:
+               get_page(virt_to_page(xdp.data));
+               consume_skb(skb);
+               xdp.rxq->mem = priv->xdp_mem;
+               if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
+                       trace_xdp_exception(priv->dev, xdp_prog, act);
+                       goto err_xdp;
+               }
+               *xdp_xmit |= VETH_XDP_TX;
+               rcu_read_unlock();
+               goto xdp_xmit;
+       case XDP_REDIRECT:
+               get_page(virt_to_page(xdp.data));
+               consume_skb(skb);
+               xdp.rxq->mem = priv->xdp_mem;
+               if (xdp_do_redirect(priv->dev, &xdp, xdp_prog))
+                       goto err_xdp;
+               *xdp_xmit |= VETH_XDP_REDIR;
+               rcu_read_unlock();
+               goto xdp_xmit;
        default:
                bpf_warn_invalid_xdp_action(act);
        case XDP_ABORTED:
@@ -474,9 +555,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv 
*priv,
        rcu_read_unlock();
        kfree_skb(skb);
        return NULL;
+err_xdp:
+       rcu_read_unlock();
+       page_frag_free(xdp.data);
+xdp_xmit:
+       return NULL;
 }
 
-static int veth_xdp_rcv(struct veth_priv *priv, int budget)
+static int veth_xdp_rcv(struct veth_priv *priv, int budget,
+                       unsigned int *xdp_xmit)
 {
        int i, done = 0;
 
@@ -487,10 +574,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int 
budget)
                if (!ptr)
                        break;
 
-               if (veth_is_xdp_frame(ptr))
-                       skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr));
-               else
-                       skb = veth_xdp_rcv_skb(priv, ptr);
+               if (veth_is_xdp_frame(ptr)) {
+                       skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr),
+                                              xdp_xmit);
+               } else {
+                       skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit);
+               }
 
                if (skb)
                        napi_gro_receive(&priv->xdp_napi, skb);
@@ -505,9 +594,11 @@ static int veth_poll(struct napi_struct *napi, int budget)
 {
        struct veth_priv *priv =
                container_of(napi, struct veth_priv, xdp_napi);
+       unsigned int xdp_xmit = 0;
        int done;
 
-       done = veth_xdp_rcv(priv, budget);
+       xdp_set_return_frame_no_direct();
+       done = veth_xdp_rcv(priv, budget, &xdp_xmit);
 
        if (done < budget && napi_complete_done(napi, done)) {
                /* Write rx_notify_masked before reading ptr_ring */
@@ -518,6 +609,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
                }
        }
 
+       if (xdp_xmit & VETH_XDP_TX)
+               veth_xdp_flush(priv->dev);
+       if (xdp_xmit & VETH_XDP_REDIR)
+               xdp_do_flush_map();
+       xdp_clear_return_frame_no_direct();
+
        return done;
 }
 
@@ -564,6 +661,9 @@ static int veth_enable_xdp(struct net_device *dev)
                err = veth_napi_add(dev);
                if (err)
                        goto err;
+
+               /* Save original mem info as it can be overwritten */
+               priv->xdp_mem = priv->xdp_rxq.mem;
        }
 
        rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog);
@@ -581,6 +681,7 @@ static void veth_disable_xdp(struct net_device *dev)
 
        rcu_assign_pointer(priv->xdp_prog, NULL);
        veth_napi_del(dev);
+       priv->xdp_rxq.mem = priv->xdp_mem;
        xdp_rxq_info_unreg(&priv->xdp_rxq);
 }
 
-- 
2.14.3

Reply via email to