Currently when setting 'userspace-tso-enable=true', tunnel test cases fail due to incorrect checksum, at inner header and outer header. The patch recalculates the checksum before packet is outputting to a port (tunnel and tap), makes sure the receiver sees correct checksum.
Consider the following cases: 1) veth -> ovs -> veth, and 2) tap -> ovs -> tap No need to recalc csum because vnet hdr carries the offload information. 3) decap: vxlan tunnel -> br-underlay -> br-overlay The inner packet is sent to br-overlay (which is a tap). Need to fix the inner header's csum. 4) encap: br-overlay -> br-underlay -> vxlan tunnel Fix the inner csum before pushing the outer header. I added iperf and pass vxlan and geneve tests: $ make check-system-tso TESTSUITEFLAGS="-k vxlan" $ make check-system-tso TESTSUITEFLAGS="-k geneve" While TCP works over tunnel, the TCP sender sending huge packet size will fail. I have to segment the inner TCP packet before pushing the outer tunnel header. Signed-off-by: William Tu <u9012...@gmail.com> --- lib/netdev-linux.c | 2 +- lib/netdev-native-tnl.c | 11 ++++++++++- lib/netdev.c | 18 ++++++------------ lib/packets.c | 34 ++++++++++++++++++++++++++++++++++ lib/packets.h | 1 + tests/system-tap.at | 3 +++ tests/system-traffic.at | 9 +++++++++ 7 files changed, 64 insertions(+), 14 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6be23dbeed57..bb365b3b0da3 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1446,7 +1446,6 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, netdev_get_name(netdev_)); continue; } - dp_packet_batch_add(batch, pkt); } @@ -1604,6 +1603,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, int error; if (tso) { + packet_csum_tcpudp(packet); netdev_linux_prepend_vnet_hdr(packet, mtu); } diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index b89dfdd52a86..003c78a151f8 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -43,6 +43,7 @@ #include "seq.h" #include "unaligned.h" #include "unixctl.h" +#include "userspace-tso.h" #include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(native_tnl); @@ -153,6 +154,12 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, struct ip_header *ip; struct ovs_16aligned_ip6_hdr *ip6; + if (userspace_tso_enabled()) { + /* Calculate inner header's checksum before pushing outer header. + * (Assume the device does not support tnl checksum) */ + packet_csum_tcpudp(packet); + } + eth = dp_packet_push_uninit(packet, size); *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header); @@ -189,7 +196,9 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, return NULL; } - if (udp->udp_csum) { + /* 'udp->udp_csum' will be the pseudo header csum when when userspace + * TSO is enabled. Skip the validation. */ + if (udp->udp_csum && !userspace_tso_enabled()) { if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet))) { uint32_t csum; if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { diff --git a/lib/netdev.c b/lib/netdev.c index 91e91955c09b..bdf0000c45e9 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -960,18 +960,12 @@ netdev_push_header(const struct netdev *netdev, size_t i, size = dp_packet_batch_size(batch); DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) - || dp_packet_hwol_l4_mask(packet))) { - COVERAGE_INC(netdev_push_header_drops); - dp_packet_delete(packet); - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " - "not supported: packet dropped", - netdev_get_name(netdev)); - } else { - netdev->netdev_class->push_header(netdev, packet, data); - pkt_metadata_init(&packet->md, data->out_port); - dp_packet_batch_refill(batch, packet, i); - } + /* Tunneling packet with HW offload flags is not supported. */ + *dp_packet_ol_flags_ptr(packet) = 0; + + netdev->netdev_class->push_header(netdev, packet, data); + pkt_metadata_init(&packet->md, data->out_port); + dp_packet_batch_refill(batch, packet, i); } return 0; diff --git a/lib/packets.c b/lib/packets.c index 4a7643c5dd3a..b0bb283acdfa 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1887,3 +1887,37 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) } } } + +void +packet_csum_tcpudp(struct dp_packet *p) +{ + struct eth_header *eth; + struct ip_header *ip; + struct tcp_header *tcp; + struct udp_header *udp; + uint32_t pseudo_hdr_csum; + uint8_t l4proto; + size_t l4_size; + + eth = dp_packet_eth(p); + if (eth->eth_type != htons(ETH_TYPE_IP)) { + return; + } + + ip = dp_packet_l3(p); + l4proto = ip->ip_proto; + l4_size = dp_packet_l4_size(p); + + if (l4proto == IPPROTO_TCP) { + pseudo_hdr_csum = packet_csum_pseudoheader(ip); + tcp = dp_packet_l4(p); + tcp->tcp_csum = 0; + tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum, tcp, l4_size)); + + } else if (l4proto == IPPROTO_UDP) { + pseudo_hdr_csum = packet_csum_pseudoheader(ip); + udp = dp_packet_l4(p); + udp->udp_csum = 0; + udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum, udp, l4_size)); + } +} diff --git a/lib/packets.h b/lib/packets.h index 481bc22fa1fe..1bea8c504811 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1634,6 +1634,7 @@ void packet_put_ra_prefix_opt(struct dp_packet *, ovs_be32 preferred_lifetime, const ovs_be128 router_prefix); uint32_t packet_csum_pseudoheader(const struct ip_header *); +void packet_csum_tcpudp(struct dp_packet *p); void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); #define DNS_HEADER_LEN 12 diff --git a/tests/system-tap.at b/tests/system-tap.at index 871a3bda4fcc..be108c59b3c9 100644 --- a/tests/system-tap.at +++ b/tests/system-tap.at @@ -29,6 +29,9 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], OVS_START_L7([at_ns1], [http]) NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) +NETNS_DAEMONIZE([at_ns0], [iperf -s], [iperf.pid]) +NS_CHECK_EXEC([at_ns1], [iperf -c 10.1.1.1 -t1 1> /dev/null], [0]) + OVS_TRAFFIC_VSWITCHD_STOP(["/.*ethtool command ETHTOOL_G.*/d"]) AT_CLEANUP diff --git a/tests/system-traffic.at b/tests/system-traffic.at index fb5b9a36d283..ed014953ca4e 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -248,6 +248,7 @@ dnl Okay, now check the overlay with different packet sizes NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) + NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) @@ -255,6 +256,10 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) +NETNS_DAEMONIZE([at_ns0], [iperf -s], [iperf.pid]) +AT_CHECK([ethtool -K br0 tso off &> /dev/null], [0]) +AT_CHECK([iperf -c 10.1.1.1 -t1 1> /dev/null], [0]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP @@ -571,6 +576,10 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) +NETNS_DAEMONIZE([at_ns0], [iperf -s], [iperf.pid]) +AT_CHECK([ethtool -K br0 tso off &> /dev/null], [0]) +AT_CHECK([iperf -c 10.1.1.1 -t1 1> /dev/null], [0]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev