William, which kernel version did you use to test for this patch? I don't want to build a kernel if Ubuntu 16.04 kernel can work.
-----邮件原件----- 发件人: dev [mailto:ovs-dev-boun...@openvswitch.org] 代表 William Tu 发送时间: 2020年2月21日 3:00 收件人: d...@openvswitch.org 抄送: f...@sysclose.org; i.maxim...@ovn.org 主题: [ovs-dev] [PATCHv2 1/2] userspace: Enable TSO support for non-DPDK. This patch enables TSO support for non-DPDK use cases, and also add check-system-tso testsuite. Before TSO, we have to disable checksum offload, allowing the kernel to calculate the TCP/UDP packet checsum. With TSO, we can skip the checksum validation by enabling checksum offload, and with large packet size, we see better performance. Consider container to container use cases: iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1) And I got around 6Gbps, similar to TSO with DPDK-enabled. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/653109097 Signed-off-by: William Tu <u9012...@gmail.com> --- v2: - add make check-system-tso test - combine logging for dpdk and non-dpdk - I'm surprised that most of the test cases passed. This is due to few tests using tcp/udp, so it does not trigger TSO. I saw only geneve/vxlan fails randomly, maybe we can check it later. --- lib/dp-packet.h | 95 ++++++++++++++++++++++++++----------------- lib/userspace-tso.c | 5 --- tests/.gitignore | 3 ++ tests/automake.mk | 15 +++++++ tests/system-tso-macros.at | 42 +++++++++++++++++++ tests/system-tso-testsuite.at | 26 ++++++++++++ 6 files changed, 143 insertions(+), 43 deletions(-) create mode 100644 tests/system-tso-macros.at create mode 100644 tests/system-tso-testsuite.at diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 9f8991faad52..6b90cec2afb4 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -53,7 +53,25 @@ enum OVS_PACKED_ENUM dp_packet_source { enum dp_packet_offload_mask { DP_PACKET_OL_RSS_HASH_MASK = 0x1, /* Is the 'rss_hash' valid? */ DP_PACKET_OL_FLOW_MARK_MASK = 0x2, /* Is the 'flow_mark' valid? */ + DP_PACKET_OL_RX_L4_CKSUM_BAD = 1 << 3, + DP_PACKET_OL_RX_IP_CKSUM_BAD = 1 << 4, + DP_PACKET_OL_RX_L4_CKSUM_GOOD = 1 << 5, + DP_PACKET_OL_RX_IP_CKSUM_GOOD = 1 << 6, + DP_PACKET_OL_TX_TCP_SEG = 1 << 7, + DP_PACKET_OL_TX_IPV4 = 1 << 8, + DP_PACKET_OL_TX_IPV6 = 1 << 9, + DP_PACKET_OL_TX_TCP_CKSUM = 1 << 10, + DP_PACKET_OL_TX_UDP_CKSUM = 1 << 11, + DP_PACKET_OL_TX_SCTP_CKSUM = 1 << 12, }; + +#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ + DP_PACKET_OL_TX_UDP_CKSUM | \ + DP_PACKET_OL_TX_SCTP_CKSUM) #define +DP_PACKET_OL_RX_IP_CKSUM_MASK (DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ + DP_PACKET_OL_RX_IP_CKSUM_BAD) +#define DP_PACKET_OL_RX_L4_CKSUM_MASK (DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ + DP_PACKET_OL_RX_L4_CKSUM_BAD) #else /* DPDK mbuf ol_flags that are not really an offload flags. These are mostly * related to mbuf memory layout and OVS should not touch/clear them. */ @@ -739,82 +757,79 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) b->allocated_ = s; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_is_tso(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_is_tso(const struct dp_packet *b) { - return false; + return !!(b->ol_flags & DP_PACKET_OL_TX_TCP_SEG); } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_is_ipv4(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_is_ipv4(const struct dp_packet *b) { - return false; + return !!(b->ol_flags & DP_PACKET_OL_TX_IPV4); } -/* There are no implementation when not DPDK enabled datapath. */ static inline uint64_t -dp_packet_hwol_l4_mask(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_mask(const struct dp_packet *b) { - return 0; + return b->ol_flags & DP_PACKET_OL_TX_L4_MASK; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_l4_is_tcp(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) { - return false; + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_TCP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_l4_is_udp(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_is_udp(const struct dp_packet *b) { - return false; + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_UDP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_l4_is_sctp(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_is_sctp(const struct dp_packet *b) { - return false; + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_SCTP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_IPV4; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_IPV6; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_csum_tcp(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_csum_tcp(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_TCP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_csum_udp(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_csum_udp(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_UDP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_csum_sctp(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_csum_sctp(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_SCTP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_tcp_seg(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_tcp_seg(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_TCP_SEG; } /* Returns the RSS hash of the packet 'p'. Note that the returned value is @@ -845,27 +860,31 @@ dp_packet_reset_offload(struct dp_packet *p) } static inline bool -dp_packet_ip_checksum_valid(const struct dp_packet *p OVS_UNUSED) +dp_packet_ip_checksum_valid(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_IP_CKSUM_MASK) == + DP_PACKET_OL_RX_IP_CKSUM_GOOD; } static inline bool -dp_packet_ip_checksum_bad(const struct dp_packet *p OVS_UNUSED) +dp_packet_ip_checksum_bad(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_IP_CKSUM_MASK) == + DP_PACKET_OL_RX_IP_CKSUM_BAD; } static inline bool -dp_packet_l4_checksum_valid(const struct dp_packet *p OVS_UNUSED) +dp_packet_l4_checksum_valid(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_GOOD; } static inline bool -dp_packet_l4_checksum_bad(const struct dp_packet *p OVS_UNUSED) +dp_packet_l4_checksum_bad(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_BAD; } static inline bool diff --git a/lib/userspace-tso.c b/lib/userspace-tso.c index 6a4a0149b7f5..f843c2a763ce 100644 --- a/lib/userspace-tso.c +++ b/lib/userspace-tso.c @@ -34,13 +34,8 @@ userspace_tso_init(const struct smap *ovs_other_config) static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { -#ifdef DPDK_NETDEV VLOG_INFO("Userspace TCP Segmentation Offloading support enabled"); userspace_tso = true; -#else - VLOG_WARN("Userspace TCP Segmentation Offloading can not be enabled" - "since OVS is built without DPDK support."); -#endif ovsthread_once_done(&once); } } diff --git a/tests/.gitignore b/tests/.gitignore index c5abb32d025a..d6591c3f5f8f 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -25,6 +25,9 @@ /system-userspace-testsuite /system-userspace-testsuite.dir/ /system-userspace-testsuite.log +/system-tso-testsuite +/system-tso-testsuite.dir/ +/system-tso-testsuite.log /system-offloads-testsuite /system-offloads-testsuite.dir/ /system-offloads-testsuite.log diff --git a/tests/automake.mk b/tests/automake.mk index 9c7ebdce9bf0..b8ddc069417e 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -4,6 +4,7 @@ EXTRA_DIST += \ $(SYSTEM_TESTSUITE_AT) \ $(SYSTEM_KMOD_TESTSUITE_AT) \ $(SYSTEM_USERSPACE_TESTSUITE_AT) \ + $(SYSTEM_TSO_TESTSUITE_AT) \ $(SYSTEM_AFXDP_TESTSUITE_AT) \ $(SYSTEM_OFFLOADS_TESTSUITE_AT) \ $(SYSTEM_DPDK_TESTSUITE_AT) \ @@ -11,6 +12,7 @@ EXTRA_DIST += \ $(TESTSUITE) \ $(SYSTEM_KMOD_TESTSUITE) \ $(SYSTEM_USERSPACE_TESTSUITE) \ + $(SYSTEM_TSO_TESTSUITE) \ $(SYSTEM_AFXDP_TESTSUITE) \ $(SYSTEM_OFFLOADS_TESTSUITE) \ $(SYSTEM_DPDK_TESTSUITE) \ @@ -154,6 +156,10 @@ SYSTEM_USERSPACE_TESTSUITE_AT = \ tests/system-userspace-macros.at \ tests/system-userspace-packet-type-aware.at +SYSTEM_TSO_TESTSUITE_AT = \ + tests/system-tso-testsuite.at \ + tests/system-tso-macros.at + SYSTEM_AFXDP_TESTSUITE_AT = \ tests/system-userspace-macros.at \ tests/system-afxdp-testsuite.at \ @@ -183,6 +189,7 @@ TESTSUITE = $(srcdir)/tests/testsuite TESTSUITE_PATCH = $(srcdir)/tests/testsuite.patch SYSTEM_KMOD_TESTSUITE = $(srcdir)/tests/system-kmod-testsuite SYSTEM_USERSPACE_TESTSUITE = $(srcdir)/tests/system-userspace-testsuite +SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite @@ -326,6 +333,10 @@ check-system-userspace: all set $(SHELL) '$(SYSTEM_USERSPACE_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) +check-system-tso: all + set $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ + "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" +--recheck) + check-afxdp: all set $(SHELL) '$(SYSTEM_AFXDP_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \ "$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) @@ -367,6 +378,10 @@ $(SYSTEM_USERSPACE_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_USERSP $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at $(AM_V_at)mv $@.tmp $@ +$(SYSTEM_TSO_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_TSO_TESTSUITE_AT) $(COMMON_MACROS_AT) + $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at + $(AM_V_at)mv $@.tmp $@ + $(SYSTEM_AFXDP_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_AFXDP_TESTSUITE_AT) $(COMMON_MACROS_AT) $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at $(AM_V_at)mv $@.tmp $@ diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at new file mode 100644 index 000000000000..b2dcc85440b4 --- /dev/null +++ b/tests/system-tso-macros.at @@ -0,0 +1,42 @@ +# _ADD_BR([name]) +# +# Expands into the proper ovs-vsctl commands to create a bridge with +the # appropriate type and properties m4_define([_ADD_BR], [[add-br $1 +-- set Bridge $1 datapath_type="netdev" +protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13,OpenFlow14,OpenFl +ow15 fail-mode=secure ]]) + +# OVS_TRAFFIC_VSWITCHD_START([vsctl-args], [vsctl-output], [=override]) +# # Creates a database and starts ovsdb-server, starts ovs-vswitchd # +connected to that database, calls ovs-vsctl to create a bridge named # +br0 with predictable settings, passing 'vsctl-args' as additional # +commands to ovs-vsctl. If 'vsctl-args' causes ovs-vsctl to provide # +output (e.g. because it includes "create" commands) then 'vsctl-output' +# specifies the expected output after filtering through uuidfilt. +m4_define([OVS_TRAFFIC_VSWITCHD_START], + [ + OVS_WAIT_WHILE([ip link show ovs-netdev]) + _OVS_VSWITCHD_START([--disable-system]) + dnl Add bridges, ports, etc. + OVS_WAIT_WHILE([ip link show br0]) + AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true]) + AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| +uuidfilt])], [0], [$2]) +]) + +# CONFIGURE_VETH_OFFLOADS([VETH]) +# +# Disable TX offloads for veths. The userspace datapath uses the +AF_PACKET # socket to receive packets for veths. Unfortunately, the +AF_PACKET socket # doesn't play well with offloads: +# 1. GSO packets are received without segmentation and therefore discarded. +# 2. Packets with offloaded partial checksum are received with the wrong +# checksum, therefore discarded by the receiver. +# +# By disabling tx offloads in the non-OVS side of the veth peer we make +sure # that the AF_PACKET socket will not receive bad packets. +# +# This is a workaround, and should be removed when offloads are +properly # supported in netdev-linux. +m4_define([CONFIGURE_VETH_OFFLOADS], + [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] + [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] +) diff --git a/tests/system-tso-testsuite.at b/tests/system-tso-testsuite.at new file mode 100644 index 000000000000..99d748006a86 --- /dev/null +++ b/tests/system-tso-testsuite.at @@ -0,0 +1,26 @@ +AT_INIT + +AT_COPYRIGHT([Copyright (c) 2020 VMware, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); you may +not use this file except in compliance with the License. +You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.]) + +m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) + +m4_include([tests/ovs-macros.at]) +m4_include([tests/ovsdb-macros.at]) +m4_include([tests/ofproto-macros.at]) +m4_include([tests/system-common-macros.at]) +m4_include([tests/system-userspace-macros.at]) +m4_include([tests/system-tso-macros.at]) + +m4_include([tests/system-traffic.at]) -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev