Deals with all the pre- and post-amble to the BPF program itself, which is
 still called one packet at a time.
Involves some fiddly percpu variables to cope with XDP_REDIRECT handling.

Signed-off-by: Edward Cree <ec...@solarflare.com>
---
 include/linux/filter.h |  10 +++
 net/core/dev.c         | 165 +++++++++++++++++++++++++++++++++++++++++++------
 net/core/filter.c      |  10 +--
 3 files changed, 156 insertions(+), 29 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..75db6cbf78a3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -820,6 +820,16 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff 
*skb,
        return 0;
 }
 
+struct redirect_info {
+       u32 ifindex;
+       u32 flags;
+       struct bpf_map *map;
+       struct bpf_map *map_to_flush;
+       unsigned long   map_owner;
+};
+
+DECLARE_PER_CPU(struct redirect_info, redirect_info);
+
 /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
diff --git a/net/core/dev.c b/net/core/dev.c
index 11f80d4502b9..22cbd5314d56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4015,15 +4015,14 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct 
sk_buff *skb)
        return rxqueue;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-                                    struct xdp_buff *xdp,
-                                    struct bpf_prog *xdp_prog)
+static u32 netif_receive_generic_xdp_prepare(struct sk_buff *skb,
+                                            struct xdp_buff *xdp,
+                                            void **orig_data,
+                                            void **orig_data_end,
+                                            u32 *mac_len)
 {
        struct netdev_rx_queue *rxqueue;
-       void *orig_data, *orig_data_end;
-       u32 metalen, act = XDP_DROP;
-       int hlen, off;
-       u32 mac_len;
+       int hlen;
 
        /* Reinjected packets coming from act_mirred or similar should
         * not get XDP generic processing.
@@ -4054,19 +4053,35 @@ static u32 netif_receive_generic_xdp(struct sk_buff 
*skb,
        /* The XDP program wants to see the packet starting at the MAC
         * header.
         */
-       mac_len = skb->data - skb_mac_header(skb);
-       hlen = skb_headlen(skb) + mac_len;
-       xdp->data = skb->data - mac_len;
+       *mac_len = skb->data - skb_mac_header(skb);
+       hlen = skb_headlen(skb) + *mac_len;
+       xdp->data = skb->data - *mac_len;
        xdp->data_meta = xdp->data;
        xdp->data_end = xdp->data + hlen;
        xdp->data_hard_start = skb->data - skb_headroom(skb);
-       orig_data_end = xdp->data_end;
-       orig_data = xdp->data;
+       *orig_data_end = xdp->data_end;
+       *orig_data = xdp->data;
 
        rxqueue = netif_get_rxqueue(skb);
        xdp->rxq = &rxqueue->xdp_rxq;
+       /* is actually XDP_ABORTED, but here we use it to mean "go ahead and
+        * run the xdp program"
+        */
+       return 0;
+do_drop:
+       kfree_skb(skb);
+       return XDP_DROP;
+}
 
-       act = bpf_prog_run_xdp(xdp_prog, xdp);
+static u32 netif_receive_generic_xdp_finish(struct sk_buff *skb,
+                                           struct xdp_buff *xdp,
+                                           struct bpf_prog *xdp_prog,
+                                           void *orig_data,
+                                           void *orig_data_end,
+                                           u32 act, u32 mac_len)
+{
+       u32 metalen;
+       int off;
 
        off = xdp->data - orig_data;
        if (off > 0)
@@ -4082,7 +4097,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        if (off != 0) {
                skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
                skb->len -= off;
-
        }
 
        switch (act) {
@@ -4102,7 +4116,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                trace_xdp_exception(skb->dev, xdp_prog, act);
                /* fall through */
        case XDP_DROP:
-       do_drop:
                kfree_skb(skb);
                break;
        }
@@ -4110,6 +4123,23 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        return act;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+                                    struct xdp_buff *xdp,
+                                    struct bpf_prog *xdp_prog)
+{
+       void *orig_data, *orig_data_end;
+       u32 act, mac_len;
+
+       act = netif_receive_generic_xdp_prepare(skb, xdp, &orig_data,
+                                               &orig_data_end, &mac_len);
+       if (act)
+               return act;
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       return netif_receive_generic_xdp_finish(skb, xdp, xdp_prog,
+                                               orig_data, orig_data_end, act,
+                                               mac_len);
+}
+
 /* When doing generic XDP we have to bypass the qdisc layer and the
  * network taps in order to match in-driver-XDP behavior.
  */
@@ -4168,6 +4198,93 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct 
sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(do_xdp_generic);
 
+struct bpf_work {
+       struct list_head list;
+       void *ctx;
+       struct redirect_info ri;
+       unsigned long ret;
+};
+
+struct xdp_work {
+       struct bpf_work w;
+       struct xdp_buff xdp;
+       struct sk_buff *skb;
+       void *orig_data;
+       void *orig_data_end;
+       u32 mac_len;
+};
+
+/* Storage area for per-packet Generic XDP metadata */
+static DEFINE_PER_CPU(struct xdp_work[NAPI_POLL_WEIGHT], xdp_work);
+
+static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
+                               struct sk_buff_head *list,
+                               struct sk_buff_head *pass_list)
+{
+       struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
+       struct bpf_work *bw;
+       struct sk_buff *skb;
+       LIST_HEAD(xdp_list);
+       int n = 0, i, err;
+       u32 act;
+
+       if (!xdp_prog) {
+               /* PASS everything */
+               skb_queue_splice_init(list, pass_list);
+               return;
+       }
+
+       xwa = this_cpu_ptr(&xdp_work);
+
+       skb_queue_for_each(skb, list) {
+               if (WARN_ON(n > NAPI_POLL_WEIGHT))
+                        /* checked in caller, can't happen */
+                        return;
+               xw = (*xwa) + n++;
+               memset(xw, 0, sizeof(*xw));
+               xw->skb = skb;
+               xw->w.ctx = &xw->xdp;
+               act = netif_receive_generic_xdp_prepare(skb, &xw->xdp,
+                                                       &xw->orig_data,
+                                                       &xw->orig_data_end,
+                                                       &xw->mac_len);
+               if (act)
+                       xw->w.ret = act;
+               else
+                       list_add_tail(&xw->w.list, &xdp_list);
+       }
+
+       list_for_each_entry(bw, &xdp_list, list) {
+               bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
+               bw->ri = *this_cpu_ptr(&redirect_info);
+       }
+
+       for (i = 0; i < n; i++) {
+               xw = (*xwa) + i;
+               act = netif_receive_generic_xdp_finish(xw->skb, &xw->xdp,
+                                                      xdp_prog, xw->orig_data,
+                                                      xw->orig_data_end,
+                                                      xw->w.ret, xw->mac_len);
+               if (act != XDP_PASS) {
+                       switch (act) {
+                       case XDP_REDIRECT:
+                               *this_cpu_ptr(&redirect_info) = xw->w.ri;
+                               err = xdp_do_generic_redirect(xw->skb->dev,
+                                                             xw->skb, &xw->xdp,
+                                                             xdp_prog);
+                               if (err) /* free and drop */
+                                       kfree_skb(xw->skb);
+                               break;
+                       case XDP_TX:
+                               generic_xdp_tx(xw->skb, xdp_prog);
+                               break;
+                       }
+               } else {
+                       __skb_queue_tail(pass_list, xw->skb);
+               }
+       }
+}
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
        int ret;
@@ -4878,7 +4995,7 @@ static void netif_receive_skb_list_internal(struct 
sk_buff_head *list)
 {
        /* Two sublists so we can go back and forth between them */
        struct sk_buff_head sublist, sublist2;
-       struct bpf_prog *xdp_prog = NULL;
+       struct bpf_prog *xdp_prog = NULL, *curr_prog = NULL;
        struct sk_buff *skb;
 
        __skb_queue_head_init(&sublist);
@@ -4893,15 +5010,23 @@ static void netif_receive_skb_list_internal(struct 
sk_buff_head *list)
 
        __skb_queue_head_init(&sublist2);
        if (static_branch_unlikely(&generic_xdp_needed_key)) {
+               struct sk_buff_head sublist3;
+               int n = 0;
+
+               __skb_queue_head_init(&sublist3);
                preempt_disable();
                rcu_read_lock();
                while ((skb = __skb_dequeue(&sublist)) != NULL) {
                        xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-                       if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
-                               /* Dropped, don't add to sublist */
-                               continue;
-                       __skb_queue_tail(&sublist2, skb);
+                       if (++n >= NAPI_POLL_WEIGHT || xdp_prog != curr_prog) {
+                               do_xdp_list_generic(curr_prog, &sublist3, 
&sublist2);
+                               __skb_queue_head_init(&sublist3);
+                               n = 0;
+                               curr_prog = xdp_prog;
+                       }
+                       __skb_queue_tail(&sublist3, skb);
                }
+               do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
                rcu_read_unlock();
                preempt_enable();
                /* Move all packets onto first sublist */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..c96aff14d76a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2039,15 +2039,7 @@ static const struct bpf_func_proto 
bpf_clone_redirect_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
-struct redirect_info {
-       u32 ifindex;
-       u32 flags;
-       struct bpf_map *map;
-       struct bpf_map *map_to_flush;
-       unsigned long   map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct redirect_info, redirect_info);
 
 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {

Reply via email to