[PATCH bpf-next] selftests_bpf: extend test_tc_tunnel test with vxlan
From: Xuesen Huang Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which encapsulates the ethernet as the inner l2 header. Update a vxlan encapsulation test case. Signed-off-by: Xuesen Huang Signed-off-by: Li Wang Signed-off-by: Willem de Bruijn --- tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 ++--- tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 ++- 2 files changed, 111 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 37bce7a..84cd632 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -24,14 +24,29 @@ static const int cfg_udp_src = 2; +#defineL2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN) + #defineUDP_PORT #defineMPLS_OVER_UDP_PORT 6635 #defineETH_OVER_UDP_PORT +#defineVXLAN_UDP_PORT 8472 + +#defineEXTPROTO_VXLAN 0x1 + +#defineVXLAN_N_VID (1u << 24) +#defineVXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8) +#defineVXLAN_FLAGS 0x8 +#defineVXLAN_VNI 1 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | MPLS_LS_S_MASK | 0xff); +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct gre_hdr { __be16 flags; __be16 protocol; @@ -45,13 +60,13 @@ struct gre_hdr { struct v4hdr { struct iphdr ip; union l4hdr l4hdr; - __u8 pad[16]; /* enough space for L2 header */ + __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header ... */ } __attribute__((packed)); struct v6hdr { struct ipv6hdr ip; union l4hdr l4hdr; - __u8 pad[16]; /* enough space for L2 header */ + __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header ... */ } __attribute__((packed)); static __always_inline void set_ipv4_csum(struct iphdr *iph) @@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) iph->check = ~((csum & 0x) + (csum >> 16)); } -static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, - __u16 l2_proto) +static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto, __u16 ext_proto) { __u16 udp_dst = UDP_PORT; struct iphdr iph_inner; struct v4hdr h_outer; struct tcphdr tcph; int olen, l2_len; + __u8 *l2_hdr = NULL; int tcp_off; __u64 flags; @@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, break; case ETH_P_TEB: l2_len = ETH_HLEN; - udp_dst = ETH_OVER_UDP_PORT; + if (ext_proto & EXTPROTO_VXLAN) { + udp_dst = VXLAN_UDP_PORT; + l2_len += sizeof(struct vxlanhdr); + } else + udp_dst = ETH_OVER_UDP_PORT; break; } flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); @@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, } /* add L2 encap (if specified) */ + l2_hdr = (__u8 *)_outer + olen; switch (l2_proto) { case ETH_P_MPLS_UC: - *((__u32 *)((__u8 *)_outer + olen)) = mpls_label; + *(__u32 *)l2_hdr = mpls_label; break; case ETH_P_TEB: - if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen, - ETH_HLEN)) + flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; + + if (ext_proto & EXTPROTO_VXLAN) { + struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; + + vxlan_hdr->vx_flags = VXLAN_FLAGS; + vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); + + l2_hdr += sizeof(struct vxlanhdr); + } + + if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN)) return TC_ACT_SHOT; + break; } olen += l2_len; @@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, return TC_ACT_OK; } -static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, +static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto) { + return __
Re: [PATCH] selftests_bpf: extend test_tc_tunnel test with vxlan
> 2021年3月4日 下午10:02,Willem de Bruijn 写道: > > On Thu, Mar 4, 2021 at 1:42 AM Xuesen Huang wrote: >> >> From: Xuesen Huang >> >> Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which >> encapsulates the ethernet as the inner l2 header. >> >> Update a vxlan encapsulation test case. >> >> Signed-off-by: Xuesen Huang >> Signed-off-by: Li Wang >> Signed-off-by: Willem de Bruijn > > Please mark patch target: [PATCH bpf-next] > Thanks. >> --- >> tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 >> ++--- >> tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 ++- >> 2 files changed, 111 insertions(+), 17 deletions(-) > > >> -static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 >> encap_proto, >> - __u16 l2_proto) >> +static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 >> encap_proto, >> + __u16 l2_proto, __u16 ext_proto) >> { >>__u16 udp_dst = UDP_PORT; >>struct iphdr iph_inner; >>struct v4hdr h_outer; >>struct tcphdr tcph; >>int olen, l2_len; >> + __u8 *l2_hdr = NULL; >>int tcp_off; >>__u64 flags; >> >> @@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff >> *skb, __u8 encap_proto, >>break; >>case ETH_P_TEB: >>l2_len = ETH_HLEN; >> - udp_dst = ETH_OVER_UDP_PORT; >> + if (ext_proto & EXTPROTO_VXLAN) { >> + udp_dst = VXLAN_UDP_PORT; >> + l2_len += sizeof(struct vxlanhdr); >> + } else >> + udp_dst = ETH_OVER_UDP_PORT; >>break; >>} >>flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); >> @@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff >> *skb, __u8 encap_proto, >>} >> >>/* add L2 encap (if specified) */ >> + l2_hdr = (__u8 *)_outer + olen; >>switch (l2_proto) { >>case ETH_P_MPLS_UC: >> - *((__u32 *)((__u8 *)_outer + olen)) = mpls_label; >> + *(__u32 *)l2_hdr = mpls_label; >>break; >>case ETH_P_TEB: >> - if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen, >> - ETH_HLEN)) >> + flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; >> + >> + if (ext_proto & EXTPROTO_VXLAN) { >> + struct vxlanhdr *vxlan_hdr = (struct vxlanhdr >> *)l2_hdr; >> + >> + vxlan_hdr->vx_flags = VXLAN_FLAGS; >> + vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & >> VXLAN_VNI_MASK) << 8); >> + >> + l2_hdr += sizeof(struct vxlanhdr); > > should this be l2_len? (here and ipv6 below) > Should be l2_hdr. It’s a little tricky. l2_len has already been modified above. We use l2_hdr here to help us to find the address in h_outer to load original Ethernet header which is different in (eth) and (vxlan + eth). >> +SEC("encap_vxlan_eth") >> +int __encap_vxlan_eth(struct __sk_buff *skb) >> +{ >> + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) >> + return __encap_ipv4(skb, IPPROTO_UDP, >> + ETH_P_TEB, >> + EXTPROTO_VXLAN); > > non-standard indentation: align with the opening parenthesis. (here > and ipv6 below) Thanks.
[PATCH] selftests_bpf: extend test_tc_tunnel test with vxlan
From: Xuesen Huang Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which encapsulates the ethernet as the inner l2 header. Update a vxlan encapsulation test case. Signed-off-by: Xuesen Huang Signed-off-by: Li Wang Signed-off-by: Willem de Bruijn --- tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 ++--- tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 ++- 2 files changed, 111 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 37bce7a..dbd18d0 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -24,14 +24,29 @@ static const int cfg_udp_src = 2; +#defineL2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN) + #defineUDP_PORT #defineMPLS_OVER_UDP_PORT 6635 #defineETH_OVER_UDP_PORT +#defineVXLAN_UDP_PORT 8472 + +#defineEXTPROTO_VXLAN 0x1 + +#defineVXLAN_N_VID (1u << 24) +#defineVXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8) +#defineVXLAN_FLAGS 0x8 +#defineVXLAN_VNI 1 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | MPLS_LS_S_MASK | 0xff); +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct gre_hdr { __be16 flags; __be16 protocol; @@ -45,13 +60,13 @@ struct gre_hdr { struct v4hdr { struct iphdr ip; union l4hdr l4hdr; - __u8 pad[16]; /* enough space for L2 header */ + __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header ... */ } __attribute__((packed)); struct v6hdr { struct ipv6hdr ip; union l4hdr l4hdr; - __u8 pad[16]; /* enough space for L2 header */ + __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header ... */ } __attribute__((packed)); static __always_inline void set_ipv4_csum(struct iphdr *iph) @@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) iph->check = ~((csum & 0x) + (csum >> 16)); } -static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, - __u16 l2_proto) +static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto, __u16 ext_proto) { __u16 udp_dst = UDP_PORT; struct iphdr iph_inner; struct v4hdr h_outer; struct tcphdr tcph; int olen, l2_len; + __u8 *l2_hdr = NULL; int tcp_off; __u64 flags; @@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, break; case ETH_P_TEB: l2_len = ETH_HLEN; - udp_dst = ETH_OVER_UDP_PORT; + if (ext_proto & EXTPROTO_VXLAN) { + udp_dst = VXLAN_UDP_PORT; + l2_len += sizeof(struct vxlanhdr); + } else + udp_dst = ETH_OVER_UDP_PORT; break; } flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); @@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, } /* add L2 encap (if specified) */ + l2_hdr = (__u8 *)_outer + olen; switch (l2_proto) { case ETH_P_MPLS_UC: - *((__u32 *)((__u8 *)_outer + olen)) = mpls_label; + *(__u32 *)l2_hdr = mpls_label; break; case ETH_P_TEB: - if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen, - ETH_HLEN)) + flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; + + if (ext_proto & EXTPROTO_VXLAN) { + struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; + + vxlan_hdr->vx_flags = VXLAN_FLAGS; + vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); + + l2_hdr += sizeof(struct vxlanhdr); + } + + if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN)) return TC_ACT_SHOT; + break; } olen += l2_len; @@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, return TC_ACT_OK; } -static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, +static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto) { + return __
[PATCH/v5] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH
From: Xuesen Huang bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets encapsulation. But that is not appropriate when pushing Ethernet header. Add an option to further specify encap L2 type and set the inner_protocol as ETH_P_TEB. Suggested-by: Willem de Bruijn Signed-off-by: Xuesen Huang Signed-off-by: Zhiyong Cheng Signed-off-by: Li Wang --- include/uapi/linux/bpf.h | 5 + net/core/filter.c | 11 ++- tools/include/uapi/linux/bpf.h | 5 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index 255aeee..8d1fb61 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ +BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK)) @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL; + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY; @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_set_network_header(skb, mac_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { -- 1.8.3.1
Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH
> 2021年3月4日 上午2:53,Willem de Bruijn 写道: > > On Wed, Mar 3, 2021 at 7:33 AM Xuesen Huang wrote: >> >> From: Xuesen Huang >> >> bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets >> encapsulation. But that is not appropriate when pushing Ethernet header. >> >> Add an option to further specify encap L2 type and set the inner_protocol >> as ETH_P_TEB. >> >> Update test_tc_tunnel to verify adding vxlan encapsulation works with >> this flag. >> >> Suggested-by: Willem de Bruijn >> Signed-off-by: Xuesen Huang >> Signed-off-by: Zhiyong Cheng >> Signed-off-by: Li Wang > > Thanks for adding the test. Perhaps that is better in a separate patch? > > Overall looks great to me. > > The patch has not (yet?) arrived on patchwork. > Thanks Willem, I will separate it into two patch. I will send patch/v5 with only that new flag addition, lol. >> enum { >> diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c >> b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c >> index 37bce7a..6e144db 100644 >> --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c >> +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c >> @@ -20,6 +20,14 @@ >> #include >> #include >> >> +#define encap_ipv4(...) __encap_ipv4(__VA_ARGS__, 0) >> + >> +#define encap_ipv4_with_ext_proto(...) __encap_ipv4(__VA_ARGS__) >> + >> +#define encap_ipv6(...) __encap_ipv6(__VA_ARGS__, 0) >> + >> +#define encap_ipv6_with_ext_proto(...) __encap_ipv6(__VA_ARGS__) >> + > > Instead of untyped macros, I'd define encap_ipv4 as a function that > calls __encap_ipv4. > > And no need for encap_ipv4_with_ext_proto equivalent to __encap_ipv4. > I defined these macros to try to keep the existing invocation for encap_ipv4/6 as the same, if we define this as a function all invocation should be modified? >> static const int cfg_port = 8000; >> >> static const int cfg_udp_src = 2; >> @@ -27,11 +35,24 @@ >> #defineUDP_PORT >> #defineMPLS_OVER_UDP_PORT 6635 >> #defineETH_OVER_UDP_PORT >> +#defineVXLAN_UDP_PORT 8472 >> + >> +#defineEXTPROTO_VXLAN 0x1 >> + >> +#defineVXLAN_N_VID (1u << 24) >> +#defineVXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8) >> +#defineVXLAN_FLAGS 0x8 >> +#defineVXLAN_VNI 1 >> >> /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ >> static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | >> MPLS_LS_S_MASK | 0xff); >> >> +struct vxlanhdr { >> + __be32 vx_flags; >> + __be32 vx_vni; >> +} __attribute__((packed)); >> + >> struct gre_hdr { >>__be16 flags; >>__be16 protocol; >> @@ -45,13 +66,13 @@ struct gre_hdr { >> struct v4hdr { >>struct iphdr ip; >>union l4hdr l4hdr; >> - __u8 pad[16]; /* enough space for L2 header */ >> + __u8 pad[24]; /* space for L2 header / vxlan >> header ... */ > > could we use something like sizeof(..) instead of a constant? > Thanks, I will try to fix this. >> @@ -171,14 +197,26 @@ static __always_inline int encap_ipv4(struct __sk_buff >> *skb, __u8 encap_proto, >>} >> >>/* add L2 encap (if specified) */ >> + l2_hdr = (__u8 *)_outer + olen; >>switch (l2_proto) { >>case ETH_P_MPLS_UC: >> - *((__u32 *)((__u8 *)_outer + olen)) = mpls_label; >> + *(__u32 *)l2_hdr = mpls_label; >>break; >>case ETH_P_TEB: >> - if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen, >> - ETH_HLEN)) > > This is non-standard indentation? Here and elsewhere. I thinks it’s a previous issue. > >> @@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff >> *skb, __u8 encap_proto, >>break; >>case ETH_P_TEB: >>l2_len = ETH_HLEN; >> - udp_dst = ETH_OVER_UDP_PORT; >> + if (ext_proto & EXTPROTO_VXLAN) { >> + udp_dst = VXLAN_UDP_PORT; >> + l2_len += sizeof(struct vxlanhdr); >> + } else >> + udp_dst = ETH_OVER_UDP_PORT; >>break;
Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH
Thanks Cong! Thanks to your suggestion, I try to add a simple test case to test_tc_tunnel. It works for me :) Thanks for your review. > 2021年3月3日 下午8:33,Xuesen Huang 写道: > > From: Xuesen Huang > > bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets > encapsulation. But that is not appropriate when pushing Ethernet header. > > Add an option to further specify encap L2 type and set the inner_protocol > as ETH_P_TEB. > > Update test_tc_tunnel to verify adding vxlan encapsulation works with > this flag. > > Suggested-by: Willem de Bruijn > Signed-off-by: Xuesen Huang > Signed-off-by: Zhiyong Cheng > Signed-off-by: Li Wang > --- > include/uapi/linux/bpf.h | 5 + > net/core/filter.c | 11 ++- > tools/include/uapi/linux/bpf.h | 5 + > tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 107 ++--- > tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 ++- > 5 files changed, 124 insertions(+), 19 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 77d7c1b..d791596 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { > * Use with ENCAP_L3/L4 flags to further specify the tunnel > * type; *len* is the length of the inner MAC header. > * > + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: > + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the > + * L2 type as Ethernet. > + * > *A call to this helper is susceptible to change the underlying > *packet buffer. Therefore, at load time, all checks on pointers > *previously done by the verifier are invalidated and must be > @@ -4088,6 +4092,7 @@ enum { > BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), > BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), > BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), > + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), > }; > > enum { > diff --git a/net/core/filter.c b/net/core/filter.c > index 255aeee..8d1fb61 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff > *skb) >BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ >BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ >BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ > + BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ >BPF_F_ADJ_ROOM_ENCAP_L2( \ > BPF_ADJ_ROOM_ENCAP_L2_MASK)) > > @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 > off, u32 len_diff, > flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) > return -EINVAL; > > + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && > + inner_mac_len < ETH_HLEN) > + return -EINVAL; > + > if (skb->encapsulation) > return -EALREADY; > > @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 > off, u32 len_diff, > skb->inner_mac_header = inner_net - inner_mac_len; > skb->inner_network_header = inner_net; > skb->inner_transport_header = inner_trans; > - skb_set_inner_protocol(skb, skb->protocol); > + > + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) > + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); > + else > + skb_set_inner_protocol(skb, skb->protocol); > > skb->encapsulation = 1; > skb_set_network_header(skb, mac_len); > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 77d7c1b..d791596 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { > * Use with ENCAP_L3/L4 flags to further specify the tunnel > * type; *len* is the length of the inner MAC header. > * > + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: > + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the > + * L2 type as Ethernet. > + * > *A call to this helper is susceptible to change the underlying > *packet buffer. Therefore, at load time, all checks on pointers > *previously done by the veri
[PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH
From: Xuesen Huang bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets encapsulation. But that is not appropriate when pushing Ethernet header. Add an option to further specify encap L2 type and set the inner_protocol as ETH_P_TEB. Update test_tc_tunnel to verify adding vxlan encapsulation works with this flag. Suggested-by: Willem de Bruijn Signed-off-by: Xuesen Huang Signed-off-by: Zhiyong Cheng Signed-off-by: Li Wang --- include/uapi/linux/bpf.h | 5 + net/core/filter.c | 11 ++- tools/include/uapi/linux/bpf.h | 5 + tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 107 ++--- tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 ++- 5 files changed, 124 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index 255aeee..8d1fb61 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ +BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK)) @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL; + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY; @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_set_network_header(skb, mac_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 37bce7a..6e144db 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
[PATCH/v3] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH
From: Xuesen Huang bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets encapsulation. But that is not appropriate when pushing Ethernet header. Add an option to further specify encap L2 type and set the inner_protocol as ETH_P_TEB. v3: - Fix the code format. v2: Suggested-by: Willem de Bruijn - Add a new flag to specify the type of the inner packet. Suggested-by: Willem de Bruijn Signed-off-by: Xuesen Huang Signed-off-by: Zhiyong Cheng Signed-off-by: Li Wang --- include/uapi/linux/bpf.h | 5 + net/core/filter.c | 11 ++- tools/include/uapi/linux/bpf.h | 5 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index 255aeee..8d1fb61 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ +BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK)) @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL; + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY; @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_set_network_header(skb, mac_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { -- 1.8.3.1
[PATCH/v2] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH
From: Xuesen Huang bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets encapsulation. But that is not appropriate when pushing Ethernet header. Add an option to further specify encap L2 type and set the inner_protocol as ETH_P_TEB. Suggested-by: Willem de Bruijn Signed-off-by: Xuesen Huang Signed-off-by: Zhiyong Cheng Signed-off-by: Li Wang --- include/uapi/linux/bpf.h | 5 + net/core/filter.c | 11 ++- tools/include/uapi/linux/bpf.h | 5 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index 255aeee..8d1fb61 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ +BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK)) @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL; + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY; @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_set_network_header(skb, mac_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77d7c1b..d791596 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4088,6 +4092,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { -- 1.8.3.1