Jiayu, also please help review this GSO patch, thanks a lot.
At 2020-07-01 14:46:43, yang_y...@163.com wrote: >From: Yi Yang <yangy...@inspur.com> > >Many NICs can't offload VXLAN UFO, so it is very important >to do VXLAN UDP GSO by software to improve VM-to-VM UDP >performance, especially for the case that VM MTU is just >1500 but not 9000. > >With this enabled in DPDK, OVS DPDK can leverage it to >improve VM-to-VM UDP performance, performance gain is very >huge, over 2 times. > >Signed-off-by: Yi Yang <yangy...@inspur.com> >--- > lib/librte_gso/Makefile | 1 + > lib/librte_gso/gso_common.h | 5 ++ > lib/librte_gso/gso_tunnel_udp4.c | 108 +++++++++++++++++++++++++++++++++++++++ > lib/librte_gso/gso_tunnel_udp4.h | 43 ++++++++++++++++ > lib/librte_gso/meson.build | 2 +- > lib/librte_gso/rte_gso.c | 8 +++ > 6 files changed, 166 insertions(+), 1 deletion(-) > create mode 100644 lib/librte_gso/gso_tunnel_udp4.c > create mode 100644 lib/librte_gso/gso_tunnel_udp4.h > >diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile >index a34846e..3005817 100644 >--- a/lib/librte_gso/Makefile >+++ b/lib/librte_gso/Makefile >@@ -17,6 +17,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c > SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c > SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c > SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c >+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_udp4.c > SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c > > # install this header file >diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h >index a0b8343..4d5f303 100644 >--- a/lib/librte_gso/gso_common.h >+++ b/lib/librte_gso/gso_common.h >@@ -26,6 +26,11 @@ > (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ > PKT_TX_TUNNEL_VXLAN)) > >+#define IS_IPV4_VXLAN_UDP4(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4 | \ >+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \ >+ (PKT_TX_UDP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ >+ PKT_TX_TUNNEL_VXLAN)) >+ > #define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \ > PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \ > (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ >diff --git a/lib/librte_gso/gso_tunnel_udp4.c >b/lib/librte_gso/gso_tunnel_udp4.c >new file mode 100644 >index 0000000..1a018ee >--- /dev/null >+++ b/lib/librte_gso/gso_tunnel_udp4.c >@@ -0,0 +1,108 @@ >+/* SPDX-License-Identifier: BSD-3-Clause >+ * Copyright(c) 2020 Inspur Corporation >+ */ >+ >+#include "gso_common.h" >+#include "gso_tunnel_udp4.h" >+ >+#define IPV4_HDR_MF_BIT (1U << 13) >+ >+static void >+update_tunnel_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs, >+ uint16_t nb_segs) >+{ >+ struct rte_ipv4_hdr *ipv4_hdr; >+ uint16_t outer_id, inner_id, tail_idx, i, length; >+ uint16_t outer_ipv4_offset, inner_ipv4_offset; >+ uint16_t udp_gre_offset, udp_offset; >+ uint8_t update_udp_hdr; >+ uint16_t frag_offset = 0, is_mf; >+ >+ outer_ipv4_offset = pkt->outer_l2_len; >+ udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len; >+ inner_ipv4_offset = udp_gre_offset + pkt->l2_len; >+ udp_offset = inner_ipv4_offset + pkt->l3_len; >+ >+ /* Outer IPv4 header. */ >+ ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + >+ outer_ipv4_offset); >+ outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); >+ >+ /* Inner IPv4 header. */ >+ ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + >+ inner_ipv4_offset); >+ inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); >+ >+ tail_idx = nb_segs - 1; >+ >+ /* Only update UDP header for VxLAN packets. */ >+ update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0; >+ >+ for (i = 0; i < nb_segs; i++) { >+ update_ipv4_header(segs[i], outer_ipv4_offset, outer_id); >+ if (update_udp_hdr) >+ update_udp_header(segs[i], udp_gre_offset); >+ update_ipv4_header(segs[i], inner_ipv4_offset, inner_id); >+ update_udp_header(segs[i], udp_offset); >+ /* For the case inner packet is UDP, we must keep UDP >+ * datagram boundary, it must be handled as IP fragment. >+ * >+ * Set IP fragment offset for inner IP header. >+ */ >+ ipv4_hdr = (struct rte_ipv4_hdr *) >+ (rte_pktmbuf_mtod(segs[i], char *) + >+ inner_ipv4_offset); >+ is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0; >+ ipv4_hdr->fragment_offset = >+ rte_cpu_to_be_16(frag_offset | is_mf); >+ length = segs[i]->pkt_len - inner_ipv4_offset - pkt->l3_len; >+ frag_offset += (length >> 3); >+ outer_id++; >+ } >+} >+ >+int >+gso_tunnel_udp4_segment(struct rte_mbuf *pkt, >+ uint16_t gso_size, >+ struct rte_mempool *direct_pool, >+ struct rte_mempool *indirect_pool, >+ struct rte_mbuf **pkts_out, >+ uint16_t nb_pkts_out) >+{ >+ struct rte_ipv4_hdr *inner_ipv4_hdr; >+ uint16_t pyld_unit_size, hdr_offset, frag_off; >+ int ret = 1; >+ >+ hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len; >+ inner_ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + >+ hdr_offset); >+ /* >+ * Don't process the packet whose MF bit or offset in the inner >+ * IPv4 header are non-zero. >+ */ >+ frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset); >+ if (unlikely(IS_FRAGMENTED(frag_off))) { >+ pkts_out[0] = pkt; >+ return 1; >+ } >+ >+ hdr_offset += pkt->l3_len; >+ /* Don't process the packet without data */ >+ if ((hdr_offset + pkt->l4_len) >= pkt->pkt_len) { >+ pkts_out[0] = pkt; >+ return 1; >+ } >+ >+ /* pyld_unit_size must be a multiple of 8 because frag_off >+ * uses 8 bytes as unit. >+ */ >+ pyld_unit_size = (gso_size - hdr_offset) & ~7U; >+ >+ /* Segment the payload */ >+ ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool, >+ indirect_pool, pkts_out, nb_pkts_out); >+ if (ret > 1) >+ update_tunnel_ipv4_udp_headers(pkt, pkts_out, ret); >+ >+ return ret; >+} >diff --git a/lib/librte_gso/gso_tunnel_udp4.h >b/lib/librte_gso/gso_tunnel_udp4.h >new file mode 100644 >index 0000000..d56e342 >--- /dev/null >+++ b/lib/librte_gso/gso_tunnel_udp4.h >@@ -0,0 +1,43 @@ >+/* SPDX-License-Identifier: BSD-3-Clause >+ * Copyright(c) 2020 Inspur Corporation >+ */ >+ >+#ifndef _GSO_TUNNEL_UDP4_H_ >+#define _GSO_TUNNEL_UDP4_H_ >+ >+#include <stdint.h> >+#include <rte_mbuf.h> >+ >+/** >+ * Segment a tunneling packet with inner TCP/IPv4 headers. This function >+ * does not check if the input packet has correct checksums, and does not >+ * update checksums for output GSO segments. Furthermore, it does not >+ * process IP fragment packets. >+ * >+ * @param pkt >+ * The packet mbuf to segment. >+ * @param gso_size >+ * The max length of a GSO segment, measured in bytes. >+ * @param direct_pool >+ * MBUF pool used for allocating direct buffers for output segments. >+ * @param indirect_pool >+ * MBUF pool used for allocating indirect buffers for output segments. >+ * @param pkts_out >+ * Pointer array used to store the MBUF addresses of output GSO >+ * segments, when it succeeds. If the memory space in pkts_out is >+ * insufficient, it fails and returns -EINVAL. >+ * @param nb_pkts_out >+ * The max number of items that 'pkts_out' can keep. >+ * >+ * @return >+ * - The number of GSO segments filled in pkts_out on success. >+ * - Return -ENOMEM if run out of memory in MBUF pools. >+ * - Return -EINVAL for invalid parameters. >+ */ >+int gso_tunnel_udp4_segment(struct rte_mbuf *pkt, >+ uint16_t gso_size, >+ struct rte_mempool *direct_pool, >+ struct rte_mempool *indirect_pool, >+ struct rte_mbuf **pkts_out, >+ uint16_t nb_pkts_out); >+#endif >diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build >index ad8dd85..05904f2 100644 >--- a/lib/librte_gso/meson.build >+++ b/lib/librte_gso/meson.build >@@ -2,6 +2,6 @@ > # Copyright(c) 2017 Intel Corporation > > sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c', >- 'gso_tunnel_tcp4.c', 'rte_gso.c') >+ 'gso_tunnel_tcp4.c', 'gso_tunnel_udp4.c', 'rte_gso.c') > headers = files('rte_gso.h') > deps += ['ethdev'] >diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c >index 751b5b6..cf401b2 100644 >--- a/lib/librte_gso/rte_gso.c >+++ b/lib/librte_gso/rte_gso.c >@@ -11,6 +11,7 @@ > #include "gso_common.h" > #include "gso_tcp4.h" > #include "gso_tunnel_tcp4.h" >+#include "gso_tunnel_udp4.h" > #include "gso_udp4.h" > > #define ILLEGAL_UDP_GSO_CTX(ctx) \ >@@ -62,6 +63,13 @@ > ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta, > direct_pool, indirect_pool, > pkts_out, nb_pkts_out); >+ } else if (IS_IPV4_VXLAN_UDP4(pkt->ol_flags) && >+ (gso_ctx->gso_types & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | >+ DEV_TX_OFFLOAD_UDP_TSO))) { >+ pkt->ol_flags &= (~PKT_TX_UDP_SEG); >+ ret = gso_tunnel_udp4_segment(pkt, gso_size, >+ direct_pool, indirect_pool, >+ pkts_out, nb_pkts_out); > } else if (IS_IPV4_TCP(pkt->ol_flags) && > (gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) { > pkt->ol_flags &= (~PKT_TX_TCP_SEG); >-- >1.8.3.1