[PATCH v2 net-next 2/2] bpf: add bpf_redirect() helper

2015-09-16 Thread Alexei Starovoitov
Existing bpf_clone_redirect() helper clones skb before redirecting
it to RX or TX of destination netdev.
Introduce bpf_redirect() helper that does that without cloning.

Benchmarked with two hosts using 10G ixgbe NICs.
One host is doing line rate pktgen.
Another host is configured as:
$ tc qdisc add dev $dev ingress
$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop
so it receives the packet on $dev and immediately xmits it on $dev + 1
The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program
that does bpf_clone_redirect() and performance is 2.0 Mpps

$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section redirect_xmit drop
which is using bpf_redirect() - 2.4 Mpps

and using cls_bpf with integrated actions as:
$ tc filter add dev $dev root pref 10 \
  bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1
performance is 2.5 Mpps

To summarize:
u32+act_bpf using clone_redirect - 2.0 Mpps
u32+act_bpf using redirect - 2.4 Mpps
cls_bpf using redirect - 2.5 Mpps

For comparison linux bridge in this setup is doing 2.1 Mpps
and ixgbe rx + drop in ip_rcv - 7.8 Mpps

Signed-off-by: Alexei Starovoitov 
Acked-by: Daniel Borkmann 
---
This approach is using per_cpu scratch area to store ifindex and flags.
The other alternatives discussed at plumbers are slower and more intrusive.
v1->v2: dropped redundant iff_up check

 include/net/sch_generic.h|1 +
 include/uapi/linux/bpf.h |8 
 include/uapi/linux/pkt_cls.h |1 +
 net/core/dev.c   |8 
 net/core/filter.c|   44 ++
 net/sched/act_bpf.c  |1 +
 net/sched/cls_bpf.c  |1 +
 samples/bpf/bpf_helpers.h|4 
 samples/bpf/tcbpf1_kern.c|   24 ++-
 9 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index da61febb9091..4c79ce8c1f92 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
   const struct qdisc_size_table *stab);
 bool tcf_destroy(struct tcf_proto *tp, bool force);
 void tcf_destroy_chain(struct tcf_proto __rcu **fl);
+int skb_do_redirect(struct sk_buff *);
 
 /* Reset all TX qdiscs greater then index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int 
i)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2fbd1c71fa3b..4ec0b5488294 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -272,6 +272,14 @@ enum bpf_func_id {
BPF_FUNC_skb_get_tunnel_key,
BPF_FUNC_skb_set_tunnel_key,
BPF_FUNC_perf_event_read,   /* u64 bpf_perf_event_read(, index) 
*/
+   /**
+* bpf_redirect(ifindex, flags) - redirect to another netdev
+* @ifindex: ifindex of the net device
+* @flags: bit 0 - if set, redirect to ingress instead of egress
+* other bits - reserved
+* Return: TC_ACT_REDIRECT
+*/
+   BPF_FUNC_redirect,
__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 0a262a83f9d4..439873775d49 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -87,6 +87,7 @@ enum {
 #define TC_ACT_STOLEN  4
 #define TC_ACT_QUEUED  5
 #define TC_ACT_REPEAT  6
+#define TC_ACT_REDIRECT7
 #define TC_ACT_JUMP0x1000
 
 /* Action type identifiers*/
diff --git a/net/core/dev.c b/net/core/dev.c
index 877c84834d81..d6a492e57874 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3668,6 +3668,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff 
*skb,
case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+   case TC_ACT_REDIRECT:
+   /* skb_mac_header check was done by cls/act_bpf, so
+* we can safely push the L2 header back before
+* redirecting to another netdev
+*/
+   __skb_push(skb, skb->mac_len);
+   skb_do_redirect(skb);
+   return NULL;
default:
break;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 971d6ba89758..da3f3d94d6e9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1427,6 +1427,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type  = ARG_ANYTHING,
 };
 
+struct redirect_info {
+   u32 ifindex;
+   u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+   struct redirect_info *ri = 

Re: [PATCH v2 net-next 2/2] bpf: add bpf_redirect() helper

2015-09-16 Thread John Fastabend
On 15-09-15 11:05 PM, Alexei Starovoitov wrote:
> Existing bpf_clone_redirect() helper clones skb before redirecting
> it to RX or TX of destination netdev.
> Introduce bpf_redirect() helper that does that without cloning.
> 
> Benchmarked with two hosts using 10G ixgbe NICs.
> One host is doing line rate pktgen.
> Another host is configured as:
> $ tc qdisc add dev $dev ingress
> $ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
>action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop
> so it receives the packet on $dev and immediately xmits it on $dev + 1
> The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program
> that does bpf_clone_redirect() and performance is 2.0 Mpps
> 
> $ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
>action bpf run object-file tcbpf1_kern.o section redirect_xmit drop
> which is using bpf_redirect() - 2.4 Mpps
> 
> and using cls_bpf with integrated actions as:
> $ tc filter add dev $dev root pref 10 \
>   bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1
> performance is 2.5 Mpps
> 
> To summarize:
> u32+act_bpf using clone_redirect - 2.0 Mpps
> u32+act_bpf using redirect - 2.4 Mpps
> cls_bpf using redirect - 2.5 Mpps
> 
> For comparison linux bridge in this setup is doing 2.1 Mpps
> and ixgbe rx + drop in ip_rcv - 7.8 Mpps
> 
> Signed-off-by: Alexei Starovoitov 
> Acked-by: Daniel Borkmann 
> ---
> This approach is using per_cpu scratch area to store ifindex and flags.
> The other alternatives discussed at plumbers are slower and more intrusive.
> v1->v2: dropped redundant iff_up check
> 
>  include/net/sch_generic.h|1 +
>  include/uapi/linux/bpf.h |8 
>  include/uapi/linux/pkt_cls.h |1 +
>  net/core/dev.c   |8 
>  net/core/filter.c|   44 
> ++
>  net/sched/act_bpf.c  |1 +
>  net/sched/cls_bpf.c  |1 +
>  samples/bpf/bpf_helpers.h|4 
>  samples/bpf/tcbpf1_kern.c|   24 ++-
>  9 files changed, 91 insertions(+), 1 deletion(-)
> 

Acked-by: John Fastabend 


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html