While offloading header modifications to TC, OVS is using {TCA_PEDIT} +
{TCA_CSUM} combination as that it the only way to represent header
rewrite.  However, {TCA_CSUM} is unable to calculate L4 checksums for
IP fragments.

Since TC already applies fragmentation bit masking, this patch simply
needs to prevent these packets from being processed through TC.

Reported-at: https://issues.redhat.com/browse/FDP-545
Signed-off-by: Eelco Chaudron <echau...@redhat.com>
---
v4: - When no mask is present for IP fragment bits do not offload.
v3: - Fixed some comment style issue.
    - Add the nw_frag mask if not set.
v2: - Fixed and added some comments.
    - Use ovs-pcap to compare packets.
---
 lib/netdev-offload-tc.c | 39 +++++++++++++++++
 lib/tc.c                |  5 ++-
 tests/system-traffic.at | 92 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 3be1c08d2..3d917e7d9 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1490,6 +1490,31 @@ parse_put_flow_ct_action(struct tc_flower *flower,
         return 0;
 }
 
+/* This function returns true if the tc layer will add a l4 checksum action
+ * for this set action.  Refer to the csum_update_flag() function for
+ * detailed logic.  Note that even the kernel only supports updating TCP,
+ * UDP and ICMPv6.
+ */
+static bool
+tc_will_add_l4_checksum(struct tc_flower *flower, int type)
+{
+    switch (type) {
+    case OVS_KEY_ATTR_IPV4:
+    case OVS_KEY_ATTR_IPV6:
+    case OVS_KEY_ATTR_TCP:
+    case OVS_KEY_ATTR_UDP:
+        switch (flower->key.ip_proto) {
+        case IPPROTO_TCP:
+        case IPPROTO_UDP:
+        case IPPROTO_ICMPV6:
+        case IPPROTO_UDPLITE:
+            return true;
+        }
+        break;
+    }
+    return false;
+}
+
 static int
 parse_put_flow_set_masked_action(struct tc_flower *flower,
                                  struct tc_action *action,
@@ -1522,6 +1547,14 @@ parse_put_flow_set_masked_action(struct tc_flower 
*flower,
         return EOPNOTSUPP;
     }
 
+    if (flower->key.flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
+        && tc_will_add_l4_checksum(flower, type)) {
+        VLOG_DBG_RL(&rl, "set action type %d not supported on fragments "
+                    "due to checksum limitation", type);
+        ofpbuf_uninit(&set_buf);
+        return EOPNOTSUPP;
+    }
+
     for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
         struct netlink_field *f = &set_flower_map[type][i];
 
@@ -2447,6 +2480,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
             }
 
             mask->nw_frag = 0;
+        } else {
+            /* This scenario should not occur. Currently, all installed IP DP
+             * flows perform a fully masked match on the fragmentation bits.
+             * However, since TC depends on this behavior, we return ENOTSUPP
+             * for now in case this behavior changes in the future. */
+             return EOPNOTSUPP;
         }
 
         if (key->nw_proto == IPPROTO_TCP) {
diff --git a/lib/tc.c b/lib/tc.c
index e55ba3b1b..2510d0a3f 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -2958,7 +2958,10 @@ csum_update_flag(struct tc_flower *flower,
      * eth(dst=<mac>),eth_type(0x0800) actions=set(ipv4(src=<new_ip>))
      * we need to force a more specific flow as this can, for example,
      * need a recalculation of icmp checksum if the packet that passes
-     * is ICMPv6 and tcp checksum if its tcp. */
+     * is ICMPv6 and tcp checksum if its tcp.
+     *
+     * This section of the code must be kept in sync with the pre-check
+     * function in netdev-offload-tc.c, tc_will_add_l4_checksum(). */
 
     switch (htype) {
     case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 202ff0492..16cf9370d 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -2436,6 +2436,98 @@ 
recirc_id(<recirc>),in_port(2),eth_type(0x86dd),ipv6(proto=58,frag=no),icmpv6(ty
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([datapath - mod_nw_src/set_field on IP fragments])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", 36:b1:ee:7c:01:03)
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", 36:b1:ee:7c:01:02)
+
+AT_DATA([flows.txt], [dnl
+  in_port=ovs-p0,ip,nw_src=10.1.1.1 actions=mod_nw_src=11.1.1.1,ovs-p1
+  in_port=ovs-p0,ipv6,ipv6_src=fc00::1 
actions=set_field:fc00::100->ipv6_src,ovs-p1
+])
+
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl -Oopenflow13 add-flows br0 flows.txt])
+
+NETNS_DAEMONIZE([at_ns1],
+                [tcpdump -l -nn -xx -U -i p1 -w p1.pcap 2> tcpdump.err],
+                [tcpdump.pid])
+OVS_WAIT_UNTIL([grep "listening" tcpdump.err])
+
+dnl IPv4 Packet content:
+dnl   Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
+dnl       Type: IPv4 (0x0800)
+dnl   Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2
+dnl       0100 .... = Version: 4
+dnl       .... 0101 = Header Length: 20 bytes (5)
+dnl       Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT)
+dnl       Total Length: 38
+dnl       Identification: 0x0001 (1)
+dnl       001. .... = Flags: 0x1, More fragments
+dnl           0... .... = Reserved bit: Not set
+dnl           .0.. .... = Don't fragment: Not set
+dnl           ..1. .... = More fragments: Set
+dnl       ...0 0000 0000 0000 = Fragment Offset: 0
+dnl       Time to Live: 64
+dnl       Protocol: UDP (17)
+dnl       Header Checksum: 0x44c2
+dnl   Data (18 bytes)
+eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 08 00"
+ip="45 00 00 26 00 01 20 00 40 11 44 c2 0a 01 01 01 0a 01 01 02"
+data="0b c4 08 84 00 26 e9 64 01 02 03 04 05 06 07 08 09 0a"
+packet="${eth} ${ip} ${data}"
+
+dnl We send each packet multiple times, one for learning, which will go to
+dnl ovs-vswitchd, and the others will go through the actual datapath.
+for i in 1 2 3 4 5; do
+  NS_CHECK_EXEC([at_ns0],
+                [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
+done
+
+dnl Update source address and checksums in original packet for comparison.
+packet=$(echo "$packet" | sed -e 's/ //g' \
+         -e 's/0a010101/0b010101/g' -e 's/44c2/43c2/g' -e 's/e964/e864/g')
+OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])
+
+dnl Repeat similar test with IPv6.
+dnl Packet content:
+dnl   Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
+dnl       Type: IPv6 (0x86dd)
+dnl   Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2
+dnl       Payload Length: 24
+dnl       Next Header: Fragment Header for IPv6 (44)
+dnl       Hop Limit: 64
+dnl       Fragment Header for IPv6
+dnl           Next header: UDP (17)
+dnl           Reserved octet: 0x00
+dnl           0000 0000 0000 0... = Offset: 0 (0 bytes)
+dnl           .... .... .... .00. = Reserved bits: 0
+dnl           .... .... .... ...1 = More Fragments: Yes
+dnl           Identification: 0x2316ab36
+dnl   Data (16 bytes)
+eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 86 dd"
+ip="60 00 00 00 00 18 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \
+    fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 00 01 23 16 ab 36"
+data="0b c4 08 84 00 26 07 65 01 02 03 04 05 06 07 08"
+packet="${eth} ${ip} ${data}"
+
+for i in 1 2 3 4 5; do
+  NS_CHECK_EXEC([at_ns0],
+                [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
+done
+
+dnl Update checksum and source address in original packet for comparison.
+packet=$(echo "$packet" | sed -e 's/ //g' -e 's/0765/0666/g' -e \
+       's/fc000000000000000000000000000001/fc000000000000000000000000000100/g')
+OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_BANNER([MPLS])
 
 AT_SETUP([mpls - encap header dp-support])
-- 
2.46.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to