[PATCH bpf-next] selftests_bpf: extend test_tc_tunnel test with vxlan

2021-03-05 Thread Xuesen Huang
From: Xuesen Huang 

Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which
encapsulates the ethernet as the inner l2 header.

Update a vxlan encapsulation test case.

Signed-off-by: Xuesen Huang 
Signed-off-by: Li Wang 
Signed-off-by: Willem de Bruijn 
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 ++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
 2 files changed, 111 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a..84cd632 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@
 
 static const int cfg_udp_src = 2;
 
+#defineL2_PAD_SZ   (sizeof(struct vxlanhdr) + ETH_HLEN)
+
 #defineUDP_PORT
 #defineMPLS_OVER_UDP_PORT  6635
 #defineETH_OVER_UDP_PORT   
+#defineVXLAN_UDP_PORT  8472
+
+#defineEXTPROTO_VXLAN  0x1
+
+#defineVXLAN_N_VID (1u << 24)
+#defineVXLAN_VNI_MASK  bpf_htonl((VXLAN_N_VID - 1) << 8)
+#defineVXLAN_FLAGS 0x8
+#defineVXLAN_VNI   1
 
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 MPLS_LS_S_MASK | 0xff);
 
+struct vxlanhdr {
+   __be32 vx_flags;
+   __be32 vx_vni;
+} __attribute__((packed));
+
 struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +60,13 @@ struct gre_hdr {
 struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
-   __u8 pad[16];   /* enough space for L2 header */
+   __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header 
... */
 } __attribute__((packed));
 
 struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
-   __u8 pad[16];   /* enough space for L2 header */
+   __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header 
... */
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0x) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 
encap_proto,
+   __u16 l2_proto, __u16 ext_proto)
 {
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+   __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
 
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff 
*skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
-   udp_dst = ETH_OVER_UDP_PORT;
+   if (ext_proto & EXTPROTO_VXLAN) {
+   udp_dst = VXLAN_UDP_PORT;
+   l2_len += sizeof(struct vxlanhdr);
+   } else
+   udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff 
*skb, __u8 encap_proto,
}
 
/* add L2 encap (if specified) */
+   l2_hdr = (__u8 *)_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
-   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
+   *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
-   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
-  ETH_HLEN))
+   flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+   if (ext_proto & EXTPROTO_VXLAN) {
+   struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+   vxlan_hdr->vx_flags = VXLAN_FLAGS;
+   vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & 
VXLAN_VNI_MASK) << 8);
+
+   l2_hdr += sizeof(struct vxlanhdr);
+   }
+
+   if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff 
*skb, __u8 encap_proto,
return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
  __u16 l2_proto)
 {
+   return __

Re: [PATCH] selftests_bpf: extend test_tc_tunnel test with vxlan

2021-03-05 Thread Xuesen Huang



> 2021年3月4日 下午10:02,Willem de Bruijn  写道:
> 
> On Thu, Mar 4, 2021 at 1:42 AM Xuesen Huang  wrote:
>> 
>> From: Xuesen Huang 
>> 
>> Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which
>> encapsulates the ethernet as the inner l2 header.
>> 
>> Update a vxlan encapsulation test case.
>> 
>> Signed-off-by: Xuesen Huang 
>> Signed-off-by: Li Wang 
>> Signed-off-by: Willem de Bruijn 
> 
> Please mark patch target: [PATCH bpf-next]
> 
Thanks.

>> ---
>> tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 
>> ++---
>> tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
>> 2 files changed, 111 insertions(+), 17 deletions(-)
> 
> 
>> -static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 
>> encap_proto,
>> - __u16 l2_proto)
>> +static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 
>> encap_proto,
>> +   __u16 l2_proto, __u16 ext_proto)
>> {
>>__u16 udp_dst = UDP_PORT;
>>struct iphdr iph_inner;
>>struct v4hdr h_outer;
>>struct tcphdr tcph;
>>int olen, l2_len;
>> +   __u8 *l2_hdr = NULL;
>>int tcp_off;
>>__u64 flags;
>> 
>> @@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>break;
>>case ETH_P_TEB:
>>l2_len = ETH_HLEN;
>> -   udp_dst = ETH_OVER_UDP_PORT;
>> +   if (ext_proto & EXTPROTO_VXLAN) {
>> +   udp_dst = VXLAN_UDP_PORT;
>> +   l2_len += sizeof(struct vxlanhdr);
>> +   } else
>> +   udp_dst = ETH_OVER_UDP_PORT;
>>break;
>>}
>>flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
>> @@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>}
>> 
>>/* add L2 encap (if specified) */
>> +   l2_hdr = (__u8 *)_outer + olen;
>>switch (l2_proto) {
>>case ETH_P_MPLS_UC:
>> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
>> +   *(__u32 *)l2_hdr = mpls_label;
>>break;
>>case ETH_P_TEB:
>> -   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
>> -  ETH_HLEN))
>> +   flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
>> +
>> +   if (ext_proto & EXTPROTO_VXLAN) {
>> +   struct vxlanhdr *vxlan_hdr = (struct vxlanhdr 
>> *)l2_hdr;
>> +
>> +   vxlan_hdr->vx_flags = VXLAN_FLAGS;
>> +   vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & 
>> VXLAN_VNI_MASK) << 8);
>> +
>> +   l2_hdr += sizeof(struct vxlanhdr);
> 
> should this be l2_len? (here and ipv6 below)
> 
Should be l2_hdr. 

It’s a little tricky. l2_len has already been modified above. We use l2_hdr 
here 
to help us to find the address in h_outer to load original Ethernet header 
which 
is different in (eth) and (vxlan + eth).

>> +SEC("encap_vxlan_eth")
>> +int __encap_vxlan_eth(struct __sk_buff *skb)
>> +{
>> +   if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
>> +   return __encap_ipv4(skb, IPPROTO_UDP,
>> +   ETH_P_TEB,
>> +   EXTPROTO_VXLAN);
> 
> non-standard indentation: align with the opening parenthesis. (here
> and ipv6 below)
Thanks.



[PATCH] selftests_bpf: extend test_tc_tunnel test with vxlan

2021-03-03 Thread Xuesen Huang
From: Xuesen Huang 

Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which
encapsulates the ethernet as the inner l2 header.

Update a vxlan encapsulation test case.

Signed-off-by: Xuesen Huang 
Signed-off-by: Li Wang 
Signed-off-by: Willem de Bruijn 
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 ++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
 2 files changed, 111 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a..dbd18d0 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@
 
 static const int cfg_udp_src = 2;
 
+#defineL2_PAD_SZ   (sizeof(struct vxlanhdr) + ETH_HLEN)
+
 #defineUDP_PORT
 #defineMPLS_OVER_UDP_PORT  6635
 #defineETH_OVER_UDP_PORT   
+#defineVXLAN_UDP_PORT  8472
+
+#defineEXTPROTO_VXLAN  0x1
+
+#defineVXLAN_N_VID (1u << 24)
+#defineVXLAN_VNI_MASK  bpf_htonl((VXLAN_N_VID - 1) << 8)
+#defineVXLAN_FLAGS 0x8
+#defineVXLAN_VNI   1
 
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 MPLS_LS_S_MASK | 0xff);
 
+struct vxlanhdr {
+   __be32 vx_flags;
+   __be32 vx_vni;
+} __attribute__((packed));
+
 struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +60,13 @@ struct gre_hdr {
 struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
-   __u8 pad[16];   /* enough space for L2 header */
+   __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header 
... */
 } __attribute__((packed));
 
 struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
-   __u8 pad[16];   /* enough space for L2 header */
+   __u8 pad[L2_PAD_SZ];/* space for L2 header / vxlan header 
... */
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0x) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 
encap_proto,
+   __u16 l2_proto, __u16 ext_proto)
 {
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+   __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
 
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff 
*skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
-   udp_dst = ETH_OVER_UDP_PORT;
+   if (ext_proto & EXTPROTO_VXLAN) {
+   udp_dst = VXLAN_UDP_PORT;
+   l2_len += sizeof(struct vxlanhdr);
+   } else
+   udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff 
*skb, __u8 encap_proto,
}
 
/* add L2 encap (if specified) */
+   l2_hdr = (__u8 *)_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
-   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
+   *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
-   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
-  ETH_HLEN))
+   flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+   if (ext_proto & EXTPROTO_VXLAN) {
+   struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+   vxlan_hdr->vx_flags = VXLAN_FLAGS;
+   vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & 
VXLAN_VNI_MASK) << 8);
+
+   l2_hdr += sizeof(struct vxlanhdr);
+   }
+
+   if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff 
*skb, __u8 encap_proto,
return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
  __u16 l2_proto)
 {
+   return __

[PATCH/v5] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang
From: Xuesen Huang 

bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
encapsulation. But that is not appropriate when pushing Ethernet header.

Add an option to further specify encap L2 type and set the inner_protocol
as ETH_P_TEB.

Suggested-by: Willem de Bruijn 
Signed-off-by: Xuesen Huang 
Signed-off-by: Zhiyong Cheng 
Signed-off-by: Li Wang 
---
 include/uapi/linux/bpf.h   |  5 +
 net/core/filter.c  | 11 ++-
 tools/include/uapi/linux/bpf.h |  5 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 255aeee..8d1fb61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 BPF_F_ADJ_ROOM_ENCAP_L2( \
  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;
 
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+   inner_mac_len < ETH_HLEN)
+   return -EINVAL;
+
if (skb->encapsulation)
return -EALREADY;
 
@@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
-   skb_set_inner_protocol(skb, skb->protocol);
+
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+   skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+   else
+   skb_set_inner_protocol(skb, skb->protocol);
 
skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
-- 
1.8.3.1



Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang



> 2021年3月4日 上午2:53,Willem de Bruijn  写道:
> 
> On Wed, Mar 3, 2021 at 7:33 AM Xuesen Huang  wrote:
>> 
>> From: Xuesen Huang 
>> 
>> bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
>> encapsulation. But that is not appropriate when pushing Ethernet header.
>> 
>> Add an option to further specify encap L2 type and set the inner_protocol
>> as ETH_P_TEB.
>> 
>> Update test_tc_tunnel to verify adding vxlan encapsulation works with
>> this flag.
>> 
>> Suggested-by: Willem de Bruijn 
>> Signed-off-by: Xuesen Huang 
>> Signed-off-by: Zhiyong Cheng 
>> Signed-off-by: Li Wang 
> 
> Thanks for adding the test. Perhaps that is better in a separate patch?
> 
> Overall looks great to me.
> 
> The patch has not (yet?) arrived on patchwork.
> 
Thanks Willem, I will separate it into two patch.

I will send patch/v5 with only that new flag addition, lol.

>> enum {
>> diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
>> b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
>> index 37bce7a..6e144db 100644
>> --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
>> +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
>> @@ -20,6 +20,14 @@
>> #include 
>> #include 
>> 
>> +#define encap_ipv4(...) __encap_ipv4(__VA_ARGS__, 0)
>> +
>> +#define encap_ipv4_with_ext_proto(...) __encap_ipv4(__VA_ARGS__)
>> +
>> +#define encap_ipv6(...) __encap_ipv6(__VA_ARGS__, 0)
>> +
>> +#define encap_ipv6_with_ext_proto(...) __encap_ipv6(__VA_ARGS__)
>> +
> 
> Instead of untyped macros, I'd define encap_ipv4 as a function that
> calls __encap_ipv4.
> 
> And no need for encap_ipv4_with_ext_proto equivalent to __encap_ipv4.
> 
I defined these macros to try to keep the existing  invocation for encap_ipv4/6
as the same, if we define this as a function all invocation should be modified?

>> static const int cfg_port = 8000;
>> 
>> static const int cfg_udp_src = 2;
>> @@ -27,11 +35,24 @@
>> #defineUDP_PORT
>> #defineMPLS_OVER_UDP_PORT  6635
>> #defineETH_OVER_UDP_PORT   
>> +#defineVXLAN_UDP_PORT  8472
>> +
>> +#defineEXTPROTO_VXLAN  0x1
>> +
>> +#defineVXLAN_N_VID (1u << 24)
>> +#defineVXLAN_VNI_MASK  bpf_htonl((VXLAN_N_VID - 1) << 8)
>> +#defineVXLAN_FLAGS 0x8
>> +#defineVXLAN_VNI   1
>> 
>> /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
>> static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
>> MPLS_LS_S_MASK | 0xff);
>> 
>> +struct vxlanhdr {
>> +   __be32 vx_flags;
>> +   __be32 vx_vni;
>> +} __attribute__((packed));
>> +
>> struct gre_hdr {
>>__be16 flags;
>>__be16 protocol;
>> @@ -45,13 +66,13 @@ struct gre_hdr {
>> struct v4hdr {
>>struct iphdr ip;
>>union l4hdr l4hdr;
>> -   __u8 pad[16];   /* enough space for L2 header */
>> +   __u8 pad[24];   /* space for L2 header / vxlan 
>> header ... */
> 
> could we use something like sizeof(..) instead of a constant?
> 
Thanks, I will try to fix this.

>> @@ -171,14 +197,26 @@ static __always_inline int encap_ipv4(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>}
>> 
>>/* add L2 encap (if specified) */
>> +   l2_hdr = (__u8 *)_outer + olen;
>>switch (l2_proto) {
>>case ETH_P_MPLS_UC:
>> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
>> +   *(__u32 *)l2_hdr = mpls_label;
>>break;
>>case ETH_P_TEB:
>> -   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
>> -  ETH_HLEN))
> 
> This is non-standard indentation? Here and elsewhere.
I thinks it’s a previous issue.

> 
>> @@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>break;
>>case ETH_P_TEB:
>>l2_len = ETH_HLEN;
>> -   udp_dst = ETH_OVER_UDP_PORT;
>> +   if (ext_proto & EXTPROTO_VXLAN) {
>> +   udp_dst = VXLAN_UDP_PORT;
>> +   l2_len += sizeof(struct vxlanhdr);
>> +   } else
>> +   udp_dst = ETH_OVER_UDP_PORT;
>>break;

Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang
Thanks Cong!

Thanks to your suggestion, I try to add a simple test case to test_tc_tunnel. 
It works 
for me :)

Thanks for your review.

> 2021年3月3日 下午8:33,Xuesen Huang  写道:
> 
> From: Xuesen Huang 
> 
> bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
> encapsulation. But that is not appropriate when pushing Ethernet header.
> 
> Add an option to further specify encap L2 type and set the inner_protocol
> as ETH_P_TEB.
> 
> Update test_tc_tunnel to verify adding vxlan encapsulation works with
> this flag.
> 
> Suggested-by: Willem de Bruijn 
> Signed-off-by: Xuesen Huang 
> Signed-off-by: Zhiyong Cheng 
> Signed-off-by: Li Wang 
> ---
> include/uapi/linux/bpf.h   |   5 +
> net/core/filter.c  |  11 ++-
> tools/include/uapi/linux/bpf.h |   5 +
> tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 107 ++---
> tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
> 5 files changed, 124 insertions(+), 19 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 77d7c1b..d791596 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
>  *  Use with ENCAP_L3/L4 flags to further specify the tunnel
>  *  type; *len* is the length of the inner MAC header.
>  *
> + *   * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
> + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
> + * L2 type as Ethernet.
> + *
>  *A call to this helper is susceptible to change the underlying
>  *packet buffer. Therefore, at load time, all checks on pointers
>  *previously done by the verifier are invalidated and must be
> @@ -4088,6 +4092,7 @@ enum {
>   BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
>   BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
>   BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
> + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
> };
> 
> enum {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 255aeee..8d1fb61 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff 
> *skb)
>BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
>BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
>BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
> +  BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
>BPF_F_ADJ_ROOM_ENCAP_L2( \
> BPF_ADJ_ROOM_ENCAP_L2_MASK))
> 
> @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
> off, u32 len_diff,
>   flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
>   return -EINVAL;
> 
> + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
> + inner_mac_len < ETH_HLEN)
> + return -EINVAL;
> +
>   if (skb->encapsulation)
>   return -EALREADY;
> 
> @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
> off, u32 len_diff,
>   skb->inner_mac_header = inner_net - inner_mac_len;
>   skb->inner_network_header = inner_net;
>   skb->inner_transport_header = inner_trans;
> - skb_set_inner_protocol(skb, skb->protocol);
> +
> + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
> + skb_set_inner_protocol(skb, htons(ETH_P_TEB));
> + else
> + skb_set_inner_protocol(skb, skb->protocol);
> 
>   skb->encapsulation = 1;
>   skb_set_network_header(skb, mac_len);
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 77d7c1b..d791596 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
>  *  Use with ENCAP_L3/L4 flags to further specify the tunnel
>  *  type; *len* is the length of the inner MAC header.
>  *
> + *   * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
> + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
> + * L2 type as Ethernet.
> + *
>  *A call to this helper is susceptible to change the underlying
>  *packet buffer. Therefore, at load time, all checks on pointers
>  *previously done by the veri

[PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang
From: Xuesen Huang 

bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
encapsulation. But that is not appropriate when pushing Ethernet header.

Add an option to further specify encap L2 type and set the inner_protocol
as ETH_P_TEB.

Update test_tc_tunnel to verify adding vxlan encapsulation works with
this flag.

Suggested-by: Willem de Bruijn 
Signed-off-by: Xuesen Huang 
Signed-off-by: Zhiyong Cheng 
Signed-off-by: Li Wang 
---
 include/uapi/linux/bpf.h   |   5 +
 net/core/filter.c  |  11 ++-
 tools/include/uapi/linux/bpf.h |   5 +
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 107 ++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
 5 files changed, 124 insertions(+), 19 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 255aeee..8d1fb61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 BPF_F_ADJ_ROOM_ENCAP_L2( \
  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;
 
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+   inner_mac_len < ETH_HLEN)
+   return -EINVAL;
+
if (skb->encapsulation)
return -EALREADY;
 
@@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
-   skb_set_inner_protocol(skb, skb->protocol);
+
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+   skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+   else
+   skb_set_inner_protocol(skb, skb->protocol);
 
skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a..6e144db 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c

[PATCH/v3] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-02-25 Thread Xuesen Huang
From: Xuesen Huang 

bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
encapsulation. But that is not appropriate when pushing Ethernet header.

Add an option to further specify encap L2 type and set the inner_protocol
as ETH_P_TEB.

v3:
- Fix the code format.

v2:
Suggested-by: Willem de Bruijn 
- Add a new flag to specify the type of the inner packet.

Suggested-by: Willem de Bruijn 
Signed-off-by: Xuesen Huang 
Signed-off-by: Zhiyong Cheng 
Signed-off-by: Li Wang 
---
 include/uapi/linux/bpf.h   |  5 +
 net/core/filter.c  | 11 ++-
 tools/include/uapi/linux/bpf.h |  5 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 255aeee..8d1fb61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 BPF_F_ADJ_ROOM_ENCAP_L2( \
  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;
 
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+   inner_mac_len < ETH_HLEN)
+   return -EINVAL;
+
if (skb->encapsulation)
return -EALREADY;
 
@@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
-   skb_set_inner_protocol(skb, skb->protocol);
+
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+   skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+   else
+   skb_set_inner_protocol(skb, skb->protocol);
 
skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
-- 
1.8.3.1



[PATCH/v2] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-02-18 Thread Xuesen Huang
From: Xuesen Huang 

bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
encapsulation. But that is not appropriate when pushing Ethernet header.

Add an option to further specify encap L2 type and set the inner_protocol
as ETH_P_TEB.

Suggested-by: Willem de Bruijn 
Signed-off-by: Xuesen Huang 
Signed-off-by: Zhiyong Cheng 
Signed-off-by: Li Wang 
---
 include/uapi/linux/bpf.h   |  5 +
 net/core/filter.c  | 11 ++-
 tools/include/uapi/linux/bpf.h |  5 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 255aeee..8d1fb61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 BPF_F_ADJ_ROOM_ENCAP_L2( \
  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;
 
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+   inner_mac_len < ETH_HLEN)
+   return -EINVAL;
+
if (skb->encapsulation)
return -EALREADY;
 
@@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
-   skb_set_inner_protocol(skb, skb->protocol);
+
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+   skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+   else
+   skb_set_inner_protocol(skb, skb->protocol);
 
skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
-- 
1.8.3.1