Add support for XDP adjust head by allocating a 256B header region
that XDP programs can grow into. This is only enabled when a XDP
program is loaded.

In order to ensure that we do not have to unwind queue headroom push
queue setup below bpf_prog_add. It reads better to do a prog ref
unwind vs another queue setup call.

TBD merge with Jason Wang's fixes, do a bit more testing, note
    big_packet support is removed by Jason as well.

Signed-off-by: John Fastabend <john.r.fastab...@intel.com>
---
 drivers/net/virtio_net.c |   53 +++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 08327e0..1a93158 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -159,6 +159,9 @@ struct virtnet_info {
        /* Ethtool settings */
        u8 duplex;
        u32 speed;
+
+       /* Headroom allocated in RX Queue */
+       unsigned int headroom;
 };
 
 struct padded_vnet_hdr {
@@ -392,6 +395,7 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
        else
                hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
+       xdp.data_hard_start = buf - vi->headroom;
        xdp.data = buf + hdr_padded_len;
        xdp.data_end = xdp.data + (len - vi->hdr_len);
 
@@ -430,9 +434,12 @@ static struct sk_buff *receive_big(struct net_device *dev,
                                   void *buf,
                                   unsigned int len)
 {
-       struct bpf_prog *xdp_prog;
        struct page *page = buf;
+       struct bpf_prog *xdp_prog;
        struct sk_buff *skb;
+       int offset;
+
+       offset = vi->headroom;
 
        rcu_read_lock();
        xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -442,7 +449,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
 
                if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
                        goto err_xdp;
-               act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len);
+               act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len);
                switch (act) {
                case XDP_PASS:
                        break;
@@ -456,7 +463,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
        }
        rcu_read_unlock();
 
-       skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+       skb = page_to_skb(vi, rq, page, offset, len, PAGE_SIZE);
        if (unlikely(!skb))
                goto err;
 
@@ -797,10 +804,10 @@ static int add_recvbuf_big(struct virtnet_info *vi, 
struct receive_queue *rq,
 
        /* rq->sg[0], rq->sg[1] share the same page */
        /* a separated rq->sg[0] for header - required in case !any_header_sg */
-       sg_set_buf(&rq->sg[0], p, vi->hdr_len);
+       sg_set_buf(&rq->sg[0], p, vi->hdr_len + vi->headroom);
 
        /* rq->sg[1] for data packet, from offset */
-       offset = sizeof(struct padded_vnet_hdr);
+       offset = vi->headroom + sizeof(struct padded_vnet_hdr);
        sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 
        /* chain first in list head */
@@ -823,24 +830,27 @@ static unsigned int get_mergeable_buf_len(struct 
ewma_pkt_len *avg_pkt_len)
        return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
 }
 
-static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
+static int add_recvbuf_mergeable(struct virtnet_info *vi,
+                                struct receive_queue *rq, gfp_t gfp)
 {
        struct page_frag *alloc_frag = &rq->alloc_frag;
+       unsigned int headroom = vi->headroom;
        char *buf;
        unsigned long ctx;
        int err;
        unsigned int len, hole;
 
        len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
-       if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
+       if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
                return -ENOMEM;
 
        buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+       buf += headroom; /* advance address leaving hole at front of pkt */
        ctx = mergeable_buf_to_ctx(buf, len);
        get_page(alloc_frag->page);
-       alloc_frag->offset += len;
+       alloc_frag->offset += len + headroom;
        hole = alloc_frag->size - alloc_frag->offset;
-       if (hole < len) {
+       if (hole < len + headroom) {
                /* To avoid internal fragmentation, if there is very likely not
                 * enough space for another buffer, add the remaining space to
                 * the current buffer. This extra space is not included in
@@ -874,7 +884,7 @@ static bool try_fill_recv(struct virtnet_info *vi, struct 
receive_queue *rq,
        gfp |= __GFP_COLD;
        do {
                if (vi->mergeable_rx_bufs)
-                       err = add_recvbuf_mergeable(rq, gfp);
+                       err = add_recvbuf_mergeable(vi, rq, gfp);
                else if (vi->big_packets)
                        err = add_recvbuf_big(vi, rq, gfp);
                else
@@ -1669,12 +1679,15 @@ static void virtnet_init_settings(struct net_device 
*dev)
        .set_settings = virtnet_set_settings,
 };
 
+#define VIRTIO_XDP_HEADROOM 256
+
 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 {
        unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
        struct virtnet_info *vi = netdev_priv(dev);
        struct bpf_prog *old_prog;
        u16 xdp_qp = 0, curr_qp;
+       unsigned int old_hr;
        int i, err;
 
        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1704,19 +1717,25 @@ static int virtnet_xdp_set(struct net_device *dev, 
struct bpf_prog *prog)
                return -ENOMEM;
        }
 
+       old_hr = vi->headroom;
+       if (prog) {
+               prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+               if (IS_ERR(prog))
+                       return PTR_ERR(prog);
+               vi->headroom = VIRTIO_XDP_HEADROOM;
+       } else {
+               vi->headroom = 0;
+       }
+
        err = virtnet_set_queues(vi, curr_qp + xdp_qp);
        if (err) {
                dev_warn(&dev->dev, "XDP Device queue allocation failure.\n");
+               vi->headroom = old_hr;
+               if (prog)
+                       bpf_prog_sub(prog, vi->max_queue_pairs - 1);
                return err;
        }
 
-       if (prog) {
-               prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
-               if (IS_ERR(prog)) {
-                       virtnet_set_queues(vi, curr_qp);
-                       return PTR_ERR(prog);
-               }
-       }
 
        vi->xdp_queue_pairs = xdp_qp;
        netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);

Reply via email to