Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Willem de Bruijn
> > Instead of untyped macros, I'd define encap_ipv4 as a function that
> > calls __encap_ipv4.
> >
> > And no need for encap_ipv4_with_ext_proto equivalent to __encap_ipv4.
> >
> I defined these macros to try to keep the existing  invocation for 
> encap_ipv4/6
> as the same, if we define this as a function all invocation should be 
> modified?

You can leave the existing invocations the same and make the new
callers caller __encap_ipv4 directly, which takes one extra argument?
Adding a __ prefixed variant with extra args is a common pattern.

> >>/* add L2 encap (if specified) */
> >> +   l2_hdr = (__u8 *)_outer + olen;
> >>switch (l2_proto) {
> >>case ETH_P_MPLS_UC:
> >> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
> >> +   *(__u32 *)l2_hdr = mpls_label;
> >>break;
> >>case ETH_P_TEB:
> >> -   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
> >> -  ETH_HLEN))
> >
> > This is non-standard indentation? Here and elsewhere.
> I thinks it’s a previous issue.

Ah right. Bad example. How about in __encap_vxlan_eth

+   return encap_ipv4_with_ext_proto(skb, IPPROTO_UDP,
+   ETH_P_TEB, EXTPROTO_VXLAN);

> >> @@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct 
> >> __sk_buff *skb, __u8 encap_proto,
> >>}
> >>
> >>/* add L2 encap (if specified) */
> >> +   l2_hdr = (__u8 *)_outer + olen;
> >>switch (l2_proto) {
> >>case ETH_P_MPLS_UC:
> >> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
> >> +   *(__u32 *)l2_hdr = mpls_label;
> >>break;
> >>case ETH_P_TEB:
> >> -   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
> >> -  ETH_HLEN))
> >> +   flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
> >
> > This is a change also for the existing case. Correctly so, I imagine.
> > But the test used to pass with the wrong protocol?
> Yes all tests pass. I’m not sure should we add this flag for the existing 
> tests
> which encap eth as the l2 header or only for the Vxlan test?

It is correct in both cases. If it does not break anything, I would do both.

Thanks,

  Willem


Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang



> 2021年3月4日 上午2:53,Willem de Bruijn  写道:
> 
> On Wed, Mar 3, 2021 at 7:33 AM Xuesen Huang  wrote:
>> 
>> From: Xuesen Huang 
>> 
>> bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
>> encapsulation. But that is not appropriate when pushing Ethernet header.
>> 
>> Add an option to further specify encap L2 type and set the inner_protocol
>> as ETH_P_TEB.
>> 
>> Update test_tc_tunnel to verify adding vxlan encapsulation works with
>> this flag.
>> 
>> Suggested-by: Willem de Bruijn 
>> Signed-off-by: Xuesen Huang 
>> Signed-off-by: Zhiyong Cheng 
>> Signed-off-by: Li Wang 
> 
> Thanks for adding the test. Perhaps that is better in a separate patch?
> 
> Overall looks great to me.
> 
> The patch has not (yet?) arrived on patchwork.
> 
Thanks Willem, I will separate it into two patch.

I will send patch/v5 with only that new flag addition, lol.

>> enum {
>> diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
>> b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
>> index 37bce7a..6e144db 100644
>> --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
>> +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
>> @@ -20,6 +20,14 @@
>> #include 
>> #include 
>> 
>> +#define encap_ipv4(...) __encap_ipv4(__VA_ARGS__, 0)
>> +
>> +#define encap_ipv4_with_ext_proto(...) __encap_ipv4(__VA_ARGS__)
>> +
>> +#define encap_ipv6(...) __encap_ipv6(__VA_ARGS__, 0)
>> +
>> +#define encap_ipv6_with_ext_proto(...) __encap_ipv6(__VA_ARGS__)
>> +
> 
> Instead of untyped macros, I'd define encap_ipv4 as a function that
> calls __encap_ipv4.
> 
> And no need for encap_ipv4_with_ext_proto equivalent to __encap_ipv4.
> 
I defined these macros to try to keep the existing  invocation for encap_ipv4/6
as the same, if we define this as a function all invocation should be modified?

>> static const int cfg_port = 8000;
>> 
>> static const int cfg_udp_src = 2;
>> @@ -27,11 +35,24 @@
>> #defineUDP_PORT
>> #defineMPLS_OVER_UDP_PORT  6635
>> #defineETH_OVER_UDP_PORT   
>> +#defineVXLAN_UDP_PORT  8472
>> +
>> +#defineEXTPROTO_VXLAN  0x1
>> +
>> +#defineVXLAN_N_VID (1u << 24)
>> +#defineVXLAN_VNI_MASK  bpf_htonl((VXLAN_N_VID - 1) << 8)
>> +#defineVXLAN_FLAGS 0x8
>> +#defineVXLAN_VNI   1
>> 
>> /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
>> static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
>> MPLS_LS_S_MASK | 0xff);
>> 
>> +struct vxlanhdr {
>> +   __be32 vx_flags;
>> +   __be32 vx_vni;
>> +} __attribute__((packed));
>> +
>> struct gre_hdr {
>>__be16 flags;
>>__be16 protocol;
>> @@ -45,13 +66,13 @@ struct gre_hdr {
>> struct v4hdr {
>>struct iphdr ip;
>>union l4hdr l4hdr;
>> -   __u8 pad[16];   /* enough space for L2 header */
>> +   __u8 pad[24];   /* space for L2 header / vxlan 
>> header ... */
> 
> could we use something like sizeof(..) instead of a constant?
> 
Thanks, I will try to fix this.

>> @@ -171,14 +197,26 @@ static __always_inline int encap_ipv4(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>}
>> 
>>/* add L2 encap (if specified) */
>> +   l2_hdr = (__u8 *)_outer + olen;
>>switch (l2_proto) {
>>case ETH_P_MPLS_UC:
>> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
>> +   *(__u32 *)l2_hdr = mpls_label;
>>break;
>>case ETH_P_TEB:
>> -   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
>> -  ETH_HLEN))
> 
> This is non-standard indentation? Here and elsewhere.
I thinks it’s a previous issue.

> 
>> @@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>break;
>>case ETH_P_TEB:
>>l2_len = ETH_HLEN;
>> -   udp_dst = ETH_OVER_UDP_PORT;
>> +   if (ext_proto & EXTPROTO_VXLAN) {
>> +   udp_dst = VXLAN_UDP_PORT;
>> +   l2_len += sizeof(struct vxlanhdr);
>> +   } else
>> +   udp_dst = ETH_OVER_UDP_PORT;
>>break;
>>}
>>flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
>> @@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff 
>> *skb, __u8 encap_proto,
>>h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
>>h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
>>tot_len = bpf_ntohs(iph_inner.payload_len) + 
>> sizeof(iph_inner) +
>> - sizeof(h_outer.l4hdr.udp);
>> + sizeof(h_outer.l4hdr.udp) + l2_len;
> 
> Was this a bug previously?
> 
Yes, a tiny bug.

>>h_outer.l4hdr.udp.check = 0;
>>h_outer.l4hdr.udp.len = 

Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Willem de Bruijn
On Wed, Mar 3, 2021 at 7:33 AM Xuesen Huang  wrote:
>
> From: Xuesen Huang 
>
> bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
> encapsulation. But that is not appropriate when pushing Ethernet header.
>
> Add an option to further specify encap L2 type and set the inner_protocol
> as ETH_P_TEB.
>
> Update test_tc_tunnel to verify adding vxlan encapsulation works with
> this flag.
>
> Suggested-by: Willem de Bruijn 
> Signed-off-by: Xuesen Huang 
> Signed-off-by: Zhiyong Cheng 
> Signed-off-by: Li Wang 

Thanks for adding the test. Perhaps that is better in a separate patch?

Overall looks great to me.

The patch has not (yet?) arrived on patchwork.

>  enum {
> diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
> b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
> index 37bce7a..6e144db 100644
> --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
> +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
> @@ -20,6 +20,14 @@
>  #include 
>  #include 
>
> +#define encap_ipv4(...) __encap_ipv4(__VA_ARGS__, 0)
> +
> +#define encap_ipv4_with_ext_proto(...) __encap_ipv4(__VA_ARGS__)
> +
> +#define encap_ipv6(...) __encap_ipv6(__VA_ARGS__, 0)
> +
> +#define encap_ipv6_with_ext_proto(...) __encap_ipv6(__VA_ARGS__)
> +

Instead of untyped macros, I'd define encap_ipv4 as a function that
calls __encap_ipv4.

And no need for encap_ipv4_with_ext_proto equivalent to __encap_ipv4.

>  static const int cfg_port = 8000;
>
>  static const int cfg_udp_src = 2;
> @@ -27,11 +35,24 @@
>  #defineUDP_PORT
>  #defineMPLS_OVER_UDP_PORT  6635
>  #defineETH_OVER_UDP_PORT   
> +#defineVXLAN_UDP_PORT  8472
> +
> +#defineEXTPROTO_VXLAN  0x1
> +
> +#defineVXLAN_N_VID (1u << 24)
> +#defineVXLAN_VNI_MASK  bpf_htonl((VXLAN_N_VID - 1) << 8)
> +#defineVXLAN_FLAGS 0x8
> +#defineVXLAN_VNI   1
>
>  /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
>  static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
>  MPLS_LS_S_MASK | 0xff);
>
> +struct vxlanhdr {
> +   __be32 vx_flags;
> +   __be32 vx_vni;
> +} __attribute__((packed));
> +
>  struct gre_hdr {
> __be16 flags;
> __be16 protocol;
> @@ -45,13 +66,13 @@ struct gre_hdr {
>  struct v4hdr {
> struct iphdr ip;
> union l4hdr l4hdr;
> -   __u8 pad[16];   /* enough space for L2 header */
> +   __u8 pad[24];   /* space for L2 header / vxlan header 
> ... */

could we use something like sizeof(..) instead of a constant?

> @@ -171,14 +197,26 @@ static __always_inline int encap_ipv4(struct __sk_buff 
> *skb, __u8 encap_proto,
> }
>
> /* add L2 encap (if specified) */
> +   l2_hdr = (__u8 *)_outer + olen;
> switch (l2_proto) {
> case ETH_P_MPLS_UC:
> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
> +   *(__u32 *)l2_hdr = mpls_label;
> break;
> case ETH_P_TEB:
> -   if (bpf_skb_load_bytes(skb, 0, (__u8 *)_outer + olen,
> -  ETH_HLEN))

This is non-standard indentation? Here and elsewhere.

> @@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff 
> *skb, __u8 encap_proto,
> break;
> case ETH_P_TEB:
> l2_len = ETH_HLEN;
> -   udp_dst = ETH_OVER_UDP_PORT;
> +   if (ext_proto & EXTPROTO_VXLAN) {
> +   udp_dst = VXLAN_UDP_PORT;
> +   l2_len += sizeof(struct vxlanhdr);
> +   } else
> +   udp_dst = ETH_OVER_UDP_PORT;
> break;
> }
> flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
> @@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff 
> *skb, __u8 encap_proto,
> h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
> h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
> tot_len = bpf_ntohs(iph_inner.payload_len) + 
> sizeof(iph_inner) +
> - sizeof(h_outer.l4hdr.udp);
> + sizeof(h_outer.l4hdr.udp) + l2_len;

Was this a bug previously?

> h_outer.l4hdr.udp.check = 0;
> h_outer.l4hdr.udp.len = bpf_htons(tot_len);
> break;
> @@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff 
> *skb, __u8 encap_proto,
> }
>
> /* add L2 encap (if specified) */
> +   l2_hdr = (__u8 *)_outer + olen;
> switch (l2_proto) {
> case ETH_P_MPLS_UC:
> -   *((__u32 *)((__u8 *)_outer + olen)) = mpls_label;
> +   *(__u32 *)l2_hdr = mpls_label;
> break;
> case ETH_P_TEB:
> -   if (bpf_skb_load_bytes(skb, 0, (__u8 

Re: [PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang
Thanks Cong!

Thanks to your suggestion, I try to add a simple test case to test_tc_tunnel. 
It works 
for me :)

Thanks for your review.

> 2021年3月3日 下午8:33,Xuesen Huang  写道:
> 
> From: Xuesen Huang 
> 
> bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
> encapsulation. But that is not appropriate when pushing Ethernet header.
> 
> Add an option to further specify encap L2 type and set the inner_protocol
> as ETH_P_TEB.
> 
> Update test_tc_tunnel to verify adding vxlan encapsulation works with
> this flag.
> 
> Suggested-by: Willem de Bruijn 
> Signed-off-by: Xuesen Huang 
> Signed-off-by: Zhiyong Cheng 
> Signed-off-by: Li Wang 
> ---
> include/uapi/linux/bpf.h   |   5 +
> net/core/filter.c  |  11 ++-
> tools/include/uapi/linux/bpf.h |   5 +
> tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 107 ++---
> tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
> 5 files changed, 124 insertions(+), 19 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 77d7c1b..d791596 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
>  *  Use with ENCAP_L3/L4 flags to further specify the tunnel
>  *  type; *len* is the length of the inner MAC header.
>  *
> + *   * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
> + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
> + * L2 type as Ethernet.
> + *
>  *A call to this helper is susceptible to change the underlying
>  *packet buffer. Therefore, at load time, all checks on pointers
>  *previously done by the verifier are invalidated and must be
> @@ -4088,6 +4092,7 @@ enum {
>   BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
>   BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
>   BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
> + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
> };
> 
> enum {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 255aeee..8d1fb61 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff 
> *skb)
>BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
>BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
>BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
> +  BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
>BPF_F_ADJ_ROOM_ENCAP_L2( \
> BPF_ADJ_ROOM_ENCAP_L2_MASK))
> 
> @@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
> off, u32 len_diff,
>   flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
>   return -EINVAL;
> 
> + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
> + inner_mac_len < ETH_HLEN)
> + return -EINVAL;
> +
>   if (skb->encapsulation)
>   return -EALREADY;
> 
> @@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
> off, u32 len_diff,
>   skb->inner_mac_header = inner_net - inner_mac_len;
>   skb->inner_network_header = inner_net;
>   skb->inner_transport_header = inner_trans;
> - skb_set_inner_protocol(skb, skb->protocol);
> +
> + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
> + skb_set_inner_protocol(skb, htons(ETH_P_TEB));
> + else
> + skb_set_inner_protocol(skb, skb->protocol);
> 
>   skb->encapsulation = 1;
>   skb_set_network_header(skb, mac_len);
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 77d7c1b..d791596 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
>  *  Use with ENCAP_L3/L4 flags to further specify the tunnel
>  *  type; *len* is the length of the inner MAC header.
>  *
> + *   * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
> + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
> + * L2 type as Ethernet.
> + *
>  *A call to this helper is susceptible to change the underlying
>  *packet buffer. Therefore, at load time, all checks on pointers
>  *previously done by the verifier are invalidated and must be
> @@ -4088,6 +4092,7 @@ enum {
>   BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
>   BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
>   BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
> + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
> };
> 
> enum {
> diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
> 

[PATCH/v4] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

2021-03-03 Thread Xuesen Huang
From: Xuesen Huang 

bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
encapsulation. But that is not appropriate when pushing Ethernet header.

Add an option to further specify encap L2 type and set the inner_protocol
as ETH_P_TEB.

Update test_tc_tunnel to verify adding vxlan encapsulation works with
this flag.

Suggested-by: Willem de Bruijn 
Signed-off-by: Xuesen Huang 
Signed-off-by: Zhiyong Cheng 
Signed-off-by: Li Wang 
---
 include/uapi/linux/bpf.h   |   5 +
 net/core/filter.c  |  11 ++-
 tools/include/uapi/linux/bpf.h |   5 +
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 107 ++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh  |  15 ++-
 5 files changed, 124 insertions(+), 19 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 255aeee..8d1fb61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3412,6 +3412,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 BPF_F_ADJ_ROOM_ENCAP_L2( \
  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3448,6 +3449,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;
 
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+   inner_mac_len < ETH_HLEN)
+   return -EINVAL;
+
if (skb->encapsulation)
return -EALREADY;
 
@@ -3466,7 +3471,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 
off, u32 len_diff,
skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
-   skb_set_inner_protocol(skb, skb->protocol);
+
+   if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+   skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+   else
+   skb_set_inner_protocol(skb, skb->protocol);
 
skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1b..d791596 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1751,6 +1751,10 @@ struct bpf_stack_build_id {
  *   Use with ENCAP_L3/L4 flags to further specify the tunnel
  *   type; *len* is the length of the inner MAC header.
  *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *   Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *   L2 type as Ethernet.
+ *
  * A call to this helper is susceptible to change the underlying
  * packet buffer. Therefore, at load time, all checks on pointers
  * previously done by the verifier are invalidated and must be
@@ -4088,6 +4092,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET= (1ULL << 5),
+   BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
 };
 
 enum {
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a..6e144db 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -20,6 +20,14 @@
 #include 
 #include 
 
+#define encap_ipv4(...) __encap_ipv4(__VA_ARGS__, 0)
+