On 05/18/2018 03:34 PM, Jesper Dangaard Brouer wrote:
> Like cpumap create queue for xdp frames that will be bulked.  For now,
> this patch simply invoke ndo_xdp_xmit foreach frame.  This happens,
> either when the map flush operation is envoked, or when the limit
> DEV_MAP_BULK_SIZE is reached.
> 
> Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
> ---
>  kernel/bpf/devmap.c |   77 
> ++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 73 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index 808808bf2bf2..cab72c100bb5 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -54,11 +54,18 @@
>  #define DEV_CREATE_FLAG_MASK \
>       (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
>  
> +#define DEV_MAP_BULK_SIZE 16
> +struct xdp_bulk_queue {
> +     struct xdp_frame *q[DEV_MAP_BULK_SIZE];
> +     unsigned int count;
> +};
> +
>  /* objects in the map */
>  struct bpf_dtab_netdev {
>       struct net_device *dev; /* must be first member, due to tracepoint */
>       struct bpf_dtab *dtab;
>       unsigned int bit;
> +     struct xdp_bulk_queue __percpu *bulkq;
>       struct rcu_head rcu;
>  };
>  
> @@ -209,6 +216,38 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
>       __set_bit(bit, bitmap);
>  }
>  
> +static int bq_xmit_all(struct bpf_dtab_netdev *obj,
> +                      struct xdp_bulk_queue *bq)
> +{
> +     unsigned int processed = 0, drops = 0;
> +     struct net_device *dev = obj->dev;
> +     int i;
> +
> +     if (unlikely(!bq->count))
> +             return 0;
> +
> +     for (i = 0; i < bq->count; i++) {
> +             struct xdp_frame *xdpf = bq->q[i];
> +
> +             prefetch(xdpf);
> +     }
> +
> +     for (i = 0; i < bq->count; i++) {
> +             struct xdp_frame *xdpf = bq->q[i];
> +             int err;
> +
> +             err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
> +             if (err) {
> +                     drops++;
> +                     xdp_return_frame(xdpf);
> +             }
> +             processed++;

This sort of thing makes it really hard to review. 'processed' and
'drops' are not read anywhere in this function. So I need to go and
check all the other patches whether there's further logic involved here
or not. I had to review your series after applying all patches in a
local branch, please never do this. Add the logic in a patch where it's
self-contained and obvious to review.

> +     }
> +     bq->count = 0;
> +
> +     return 0;
> +}
> +
>  /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
>   * from the driver before returning from its napi->poll() routine. The poll()
>   * routine is called either from busy_poll context or net_rx_action signaled
> @@ -224,6 +263,7 @@ void __dev_map_flush(struct bpf_map *map)
>  
>       for_each_set_bit(bit, bitmap, map->max_entries) {
>               struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
> +             struct xdp_bulk_queue *bq;
>               struct net_device *netdev;
>  
>               /* This is possible if the dev entry is removed by user space
> @@ -233,6 +273,9 @@ void __dev_map_flush(struct bpf_map *map)
>                       continue;
>  
>               __clear_bit(bit, bitmap);
> +
> +             bq = this_cpu_ptr(dev->bulkq);
> +             bq_xmit_all(dev, bq);
>               netdev = dev->dev;
>               if (likely(netdev->netdev_ops->ndo_xdp_flush))
>                       netdev->netdev_ops->ndo_xdp_flush(netdev);
> @@ -255,6 +298,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct 
> bpf_map *map, u32 key)
>       return obj;
>  }
>  
> +/* Runs under RCU-read-side, plus in softirq under NAPI protection.
> + * Thus, safe percpu variable access.
> + */
> +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
> +{
> +     struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
> +
> +     if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
> +             bq_xmit_all(obj, bq);
> +
> +     bq->q[bq->count++] = xdpf;
> +     return 0;
> +}
> +
>  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
>  {
>       struct net_device *dev = dst->dev;
> @@ -268,8 +325,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct 
> xdp_buff *xdp)
>       if (unlikely(!xdpf))
>               return -EOVERFLOW;
>  
> -     /* TODO: implement a bulking/enqueue step later */
> -     err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
> +     err = bq_enqueue(dst, xdpf);
>       if (err)
>               return err;
>  
> @@ -288,13 +344,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev 
> *dev)
>  {
>       if (dev->dev->netdev_ops->ndo_xdp_flush) {
>               struct net_device *fl = dev->dev;
> +             struct xdp_bulk_queue *bq;
>               unsigned long *bitmap;
> +
>               int cpu;

Please remove the newline from above.

>               for_each_online_cpu(cpu) {
>                       bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
>                       __clear_bit(dev->bit, bitmap);
>  
> +                     bq = per_cpu_ptr(dev->bulkq, cpu);
> +                     bq_xmit_all(dev, bq);
> +
>                       fl->netdev_ops->ndo_xdp_flush(dev->dev);
>               }
>       }
> @@ -306,6 +367,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
>  
>       dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
>       dev_map_flush_old(dev);
> +     free_percpu(dev->bulkq);
>       dev_put(dev->dev);
>       kfree(dev);
>  }
> @@ -338,6 +400,7 @@ static int dev_map_update_elem(struct bpf_map *map, void 
> *key, void *value,
>  {
>       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
>       struct net *net = current->nsproxy->net_ns;
> +     gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
>       struct bpf_dtab_netdev *dev, *old_dev;
>       u32 i = *(u32 *)key;
>       u32 ifindex = *(u32 *)value;
> @@ -352,11 +415,17 @@ static int dev_map_update_elem(struct bpf_map *map, 
> void *key, void *value,
>       if (!ifindex) {
>               dev = NULL;
>       } else {
> -             dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
> -                                map->numa_node);
> +             dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
>               if (!dev)
>                       return -ENOMEM;
>  
> +             dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
> +                                             sizeof(void *), gfp);
> +             if (!dev->bulkq) {
> +                     kfree(dev);
> +                     return -ENOMEM;
> +             }
> +
>               dev->dev = dev_get_by_index(net, ifindex);
>               if (!dev->dev) {
>                       kfree(dev);

Ahh well, and I just realized that here you are leaking memory in the error 
path. :(

A free_percpu(dev->bulkq) is missing.

Please fix this bug up and send a fresh series, so we can fix this right away 
without
needing a follow-up in bpf-next.

Thanks,
Daniel

Reply via email to