On 05/18/2018 06:34 AM, Jesper Dangaard Brouer wrote: > Notice how this allow us get XDP statistic without affecting the XDP > performance, as tracepoint is no-longer activated on a per packet basis. > > Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com> > ---
[...] > #include <trace/define_trace.h> > diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c > index cab72c100bb5..6f84100723b0 100644 > --- a/kernel/bpf/devmap.c > +++ b/kernel/bpf/devmap.c > @@ -50,6 +50,7 @@ > #include <linux/bpf.h> > #include <net/xdp.h> > #include <linux/filter.h> > +#include <trace/events/xdp.h> > > #define DEV_CREATE_FLAG_MASK \ > (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) > @@ -57,6 +58,7 @@ > #define DEV_MAP_BULK_SIZE 16 > struct xdp_bulk_queue { > struct xdp_frame *q[DEV_MAP_BULK_SIZE]; > + struct net_device *dev_rx; > unsigned int count; > }; > > @@ -219,8 +221,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) > static int bq_xmit_all(struct bpf_dtab_netdev *obj, > struct xdp_bulk_queue *bq) > { > - unsigned int processed = 0, drops = 0; > struct net_device *dev = obj->dev; > + int sent = 0, drops = 0; > int i; > > if (unlikely(!bq->count)) > @@ -241,10 +243,13 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, > drops++; > xdp_return_frame(xdpf); > } > - processed++; > + sent++; Do 'dropped' frames also get counted as 'sent' frames? This seems a bit counter-intuitive to me. Should it be 'drops+sent = total frames' instead? > } > bq->count = 0; > > + trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, > + sent, drops, bq->dev_rx, dev); > + bq->dev_rx = NULL; > return 0; > } > > @@ -301,18 +306,28 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct > bpf_map *map, u32 key) > /* Runs under RCU-read-side, plus in softirq under NAPI protection. > * Thus, safe percpu variable access. > */ > -static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) > +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, > + struct net_device *dev_rx) > + > { > struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); > > if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) > bq_xmit_all(obj, bq); > > + /* Ingress dev_rx will be the same for all xdp_frame's in > + * bulk_queue, because bq stored per-CPU and must be flushed > + * from net_device drivers NAPI func end. > + */ > + if (!bq->dev_rx) > + bq->dev_rx = dev_rx; > + > bq->q[bq->count++] = xdpf; > return 0; > } > > -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) > +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > + struct net_device *dev_rx) > { > struct net_device *dev = dst->dev; > struct xdp_frame *xdpf; > @@ -325,7 +340,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct > xdp_buff *xdp) > if (unlikely(!xdpf)) > return -EOVERFLOW; > > - err = bq_enqueue(dst, xdpf); > + err = bq_enqueue(dst, xdpf, dev_rx); > if (err) > return err; > > diff --git a/net/core/filter.c b/net/core/filter.c > index 1447ec94ef74..4a93423cc5ea 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3063,7 +3063,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, > void *fwd, > case BPF_MAP_TYPE_DEVMAP: { > struct bpf_dtab_netdev *dst = fwd; > > - err = dev_map_enqueue(dst, xdp); > + err = dev_map_enqueue(dst, xdp, dev_rx); > if (err) > return err; > __dev_map_insert_ctx(map, index); >