tc_tunnel only partially validated decap state and missed some tunnel
cases. In particular, IPXIP decap checks were not exercised for
IPIP/SIT paths, and non-GSO decap encapsulation state was not
verified.

Tighten the test by:

- setting DECAP_IPXIP4/6 flags for IPIP/SIT/IP6 decap paths based on
  the outer tunnel header family;
- requiring needed DECAP enum values via CO-RE enum existence checks
  so missing kernel support fails fast;
- validating post-decap tunnel state for both GSO and non-GSO packets:
  expected gso_type bits must be cleared and skb->encapsulation must
  match remaining tunnel flags;
- removing forced TSO disable in the test harness so GSO validation is
  exercised.

This improves coverage for decap tunnel-state regressions and ensures
sit_none/ipip-style paths are checked correctly.

Signed-off-by: Nick Hudson <[email protected]>
---
 .../selftests/bpf/prog_tests/test_tc_tunnel.c |  1 -
 .../selftests/bpf/progs/test_tc_tunnel.c      | 91 +++++++++++++++++--
 2 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c 
b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
index 1aa7c9463980..67ba27d69347 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -438,7 +438,6 @@ static int setup(void)
        SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
            "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
            "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
-       SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
        SYS(fail_close_ns_client, "ip link set veth1 up");
        nstoken_server = open_netns(SERVER_NS);
        if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c 
b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7376df405a6b..853bca962910 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -6,6 +6,7 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
 #include "bpf_tracing_net.h"
 #include "bpf_compiler.h"
 
@@ -37,6 +38,22 @@ struct vxlanhdr___local {
 
 #define        EXTPROTO_VXLAN  0x1
 
+#define SKB_GSO_UDP_TUNNEL_MASK        (SKB_GSO_UDP_TUNNEL |                   
\
+                                SKB_GSO_UDP_TUNNEL_CSUM)
+
+#define SKB_GSO_TUNNEL_MASK    (SKB_GSO_UDP_TUNNEL_MASK |              \
+                                SKB_GSO_GRE |                          \
+                                SKB_GSO_GRE_CSUM |                     \
+                                SKB_GSO_IPXIP4 |                       \
+                                SKB_GSO_IPXIP6 |                       \
+                                SKB_GSO_ESP)
+
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK   (BPF_F_ADJ_ROOM_DECAP_L4_UDP |  \
+                                        BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK        (BPF_F_ADJ_ROOM_DECAP_IPXIP4 |  
\
+                                        BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
 #define        VXLAN_FLAGS     bpf_htonl(1<<27)
 #define        VNI_ID          1
 #define        VXLAN_VNI       bpf_htonl(VNI_ID << 8)
@@ -589,9 +606,12 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
                return TC_ACT_OK;
 }
 
-static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
+static int decap_internal(struct __sk_buff *skb, int off, int len, char proto,
+                         __u64 ipxip_flag)
 {
        __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+       struct sk_buff *kskb;
+       struct skb_shared_info *shinfo;
        struct ipv6_opt_hdr ip6_opt_hdr;
        struct gre_hdr greh;
        struct udphdr udph;
@@ -599,10 +619,12 @@ static int decap_internal(struct __sk_buff *skb, int off, 
int len, char proto)
 
        switch (proto) {
        case IPPROTO_IPIP:
-               flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+               flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
+                        ipxip_flag;
                break;
        case IPPROTO_IPV6:
-               flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+               flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
+                        ipxip_flag;
                break;
        case NEXTHDR_DEST:
                if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
@@ -610,10 +632,12 @@ static int decap_internal(struct __sk_buff *skb, int off, 
int len, char proto)
                        return TC_ACT_OK;
                switch (ip6_opt_hdr.nexthdr) {
                case IPPROTO_IPIP:
-                       flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+                       flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
+                                ipxip_flag;
                        break;
                case IPPROTO_IPV6:
-                       flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+                       flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
+                                ipxip_flag;
                        break;
                default:
                        return TC_ACT_OK;
@@ -621,6 +645,11 @@ static int decap_internal(struct __sk_buff *skb, int off, 
int len, char proto)
                break;
        case IPPROTO_GRE:
                olen += sizeof(struct gre_hdr);
+               if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+                                               BPF_F_ADJ_ROOM_DECAP_L4_GRE))
+                       return TC_ACT_SHOT;
+               flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE;
+
                if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
                        return TC_ACT_OK;
                switch (bpf_ntohs(greh.protocol)) {
@@ -634,6 +663,10 @@ static int decap_internal(struct __sk_buff *skb, int off, 
int len, char proto)
                break;
        case IPPROTO_UDP:
                olen += sizeof(struct udphdr);
+               if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+                                               BPF_F_ADJ_ROOM_DECAP_L4_UDP))
+                       return TC_ACT_SHOT;
+               flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP;
                if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
                        return TC_ACT_OK;
                switch (bpf_ntohs(udph.dest)) {
@@ -655,6 +688,40 @@ static int decap_internal(struct __sk_buff *skb, int off, 
int len, char proto)
        if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
                return TC_ACT_SHOT;
 
+       kskb = bpf_cast_to_kern_ctx(skb);
+       shinfo = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+       if (shinfo->gso_size) {
+               if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+                   (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_MASK))
+                       return TC_ACT_SHOT;
+
+               if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+                   (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+                       return TC_ACT_SHOT;
+
+               if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+                   (shinfo->gso_type & SKB_GSO_IPXIP4))
+                       return TC_ACT_SHOT;
+
+               if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+                   (shinfo->gso_type & SKB_GSO_IPXIP6))
+                       return TC_ACT_SHOT;
+
+               if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+                            BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+                       if ((shinfo->gso_type & SKB_GSO_TUNNEL_MASK) &&
+                           !kskb->encapsulation)
+                               return TC_ACT_SHOT;
+                       if (!(shinfo->gso_type & SKB_GSO_TUNNEL_MASK) &&
+                           kskb->encapsulation)
+                               return TC_ACT_SHOT;
+               }
+       } else if ((flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+                            BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) &&
+                  kskb->encapsulation) {
+               return TC_ACT_SHOT;
+       }
+
        return TC_ACT_OK;
 }
 
@@ -662,6 +729,10 @@ static int decap_ipv4(struct __sk_buff *skb)
 {
        struct iphdr iph_outer;
 
+       if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+                                       BPF_F_ADJ_ROOM_DECAP_IPXIP4))
+               return TC_ACT_SHOT;
+
        if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
                               sizeof(iph_outer)) < 0)
                return TC_ACT_OK;
@@ -670,19 +741,25 @@ static int decap_ipv4(struct __sk_buff *skb)
                return TC_ACT_OK;
 
        return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
-                             iph_outer.protocol);
+                             iph_outer.protocol,
+                             BPF_F_ADJ_ROOM_DECAP_IPXIP4);
 }
 
 static int decap_ipv6(struct __sk_buff *skb)
 {
        struct ipv6hdr iph_outer;
 
+       if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+                                       BPF_F_ADJ_ROOM_DECAP_IPXIP6))
+               return TC_ACT_SHOT;
+
        if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
                               sizeof(iph_outer)) < 0)
                return TC_ACT_OK;
 
        return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
-                             iph_outer.nexthdr);
+                             iph_outer.nexthdr,
+                             BPF_F_ADJ_ROOM_DECAP_IPXIP6);
 }
 
 SEC("tc")
-- 
2.34.1


Reply via email to