Re: [bpf-next V4 PATCH 3/8] xdp: add tracepoint for devmap like cpumap have

John Fastabend Wed, 23 May 2018 07:24:51 -0700

On 05/18/2018 06:34 AM, Jesper Dangaard Brouer wrote:
> Notice how this allow us get XDP statistic without affecting the XDP
> performance, as tracepoint is no-longer activated on a per packet basis.
> 
> Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
> ---



[...]

>  #include <trace/define_trace.h>
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index cab72c100bb5..6f84100723b0 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -50,6 +50,7 @@
>  #include <linux/bpf.h>
>  #include <net/xdp.h>
>  #include <linux/filter.h>
> +#include <trace/events/xdp.h>
>  
>  #define DEV_CREATE_FLAG_MASK \
>       (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
> @@ -57,6 +58,7 @@
>  #define DEV_MAP_BULK_SIZE 16
>  struct xdp_bulk_queue {
>       struct xdp_frame *q[DEV_MAP_BULK_SIZE];
> +     struct net_device *dev_rx;
>       unsigned int count;
>  };
>  
> @@ -219,8 +221,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
>  static int bq_xmit_all(struct bpf_dtab_netdev *obj,
>                        struct xdp_bulk_queue *bq)
>  {
> -     unsigned int processed = 0, drops = 0;
>       struct net_device *dev = obj->dev;
> +     int sent = 0, drops = 0;
>       int i;
>  
>       if (unlikely(!bq->count))
> @@ -241,10 +243,13 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
>                       drops++;
>                       xdp_return_frame(xdpf);
>               }
> -             processed++;
> +             sent++;

Do 'dropped' frames also get counted as 'sent' frames? This seems a bit
counter-intuitive to me. Should it be 'drops+sent = total frames'
instead?

>       }
>       bq->count = 0;
>  
> +     trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
> +                           sent, drops, bq->dev_rx, dev);
> +     bq->dev_rx = NULL;
>       return 0;
>  }
>  
> @@ -301,18 +306,28 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct 
> bpf_map *map, u32 key)
>  /* Runs under RCU-read-side, plus in softirq under NAPI protection.
>   * Thus, safe percpu variable access.
>   */
> -static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
> +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
> +                   struct net_device *dev_rx)
> +
>  {
>       struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
>  
>       if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
>               bq_xmit_all(obj, bq);
>  
> +     /* Ingress dev_rx will be the same for all xdp_frame's in
> +      * bulk_queue, because bq stored per-CPU and must be flushed
> +      * from net_device drivers NAPI func end.
> +      */
> +     if (!bq->dev_rx)
> +             bq->dev_rx = dev_rx;
> +
>       bq->q[bq->count++] = xdpf;
>       return 0;
>  }
>  
> -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
> +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
> +                 struct net_device *dev_rx)
>  {
>       struct net_device *dev = dst->dev;
>       struct xdp_frame *xdpf;
> @@ -325,7 +340,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct 
> xdp_buff *xdp)
>       if (unlikely(!xdpf))
>               return -EOVERFLOW;
>  
> -     err = bq_enqueue(dst, xdpf);
> +     err = bq_enqueue(dst, xdpf, dev_rx);
>       if (err)
>               return err;
>  
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1447ec94ef74..4a93423cc5ea 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3063,7 +3063,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, 
> void *fwd,
>       case BPF_MAP_TYPE_DEVMAP: {
>               struct bpf_dtab_netdev *dst = fwd;
>  
> -             err = dev_map_enqueue(dst, xdp);
> +             err = dev_map_enqueue(dst, xdp, dev_rx);
>               if (err)
>                       return err;
>               __dev_map_insert_ctx(map, index);
>

Re: [bpf-next V4 PATCH 3/8] xdp: add tracepoint for devmap like cpumap have

Reply via email to