On Thu, Oct 26, 2023 at 3:40 AM Dexia Li via dev <ovs-dev@openvswitch.org> wrote: > > For userspace datapath, this patch provides vxlan and geneve tunnel tso. > Only support userspace vxlan or geneve tunnel, meanwhile support > tunnel outter and inner csum offload. If netdev do not support offload > features, there is a software fallback.If netdev do not support vxlan > and geneve tso,packets will drop. Front-end devices can close offload > features by ethtool also. > > Signed-off-by: Dexia Li <dexia...@jaguarmicro.com> > --- > lib/dp-packet.c | 41 +++++++- > lib/dp-packet.h | 216 ++++++++++++++++++++++++++++++++++++---- > lib/dpif-netdev.c | 4 +- > lib/flow.c | 2 +- > lib/netdev-dpdk.c | 88 ++++++++++++++-- > lib/netdev-dummy.c | 2 +- > lib/netdev-native-tnl.c | 106 ++++++++++++++++++-- > lib/netdev-provider.h | 4 + > lib/netdev.c | 35 +++++-- > lib/packets.c | 12 +-- > lib/packets.h | 6 +- > tests/dpif-netdev.at | 4 +- > 12 files changed, 461 insertions(+), 59 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index ed004c3b9..b5013da9f 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -543,16 +543,47 @@ dp_packet_compare_offsets(struct dp_packet *b1, struct > dp_packet *b2, > return true; > } > > +void > +dp_packet_tnl_outer_ol_send_prepare(struct dp_packet *p, > + uint64_t flags) > +{ > + if (dp_packet_hwol_is_outer_ipv4_cksum(p)) { > + if (!(flags & NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM)) { > + dp_packet_ip_set_header_csum(p, false); > + dp_packet_ol_set_ip_csum_good(p); > + dp_packet_hwol_reset_outer_ipv4_csum(p); > + } > + } > + > + if (!dp_packet_hwol_is_outer_UDP_cksum(p)) { > + return; > + } > + > + if (!(flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) { > + packet_udp_complete_csum(p, false); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_outer_udp_csum(p); > + } > +} > + > /* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags' > * and if not, updates the packet with the software fall back. */ > void > dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags) > { > + bool tnl_inner = false; > + > + if (dp_packet_hwol_is_tunnel_geneve(p) || > + dp_packet_hwol_is_tunnel_vxlan(p)) { > + dp_packet_tnl_outer_ol_send_prepare(p, flags); > + tnl_inner = true; > + } > + > if (dp_packet_hwol_tx_ip_csum(p)) { > if (dp_packet_ip_checksum_good(p)) { > dp_packet_hwol_reset_tx_ip_csum(p); > } else if (!(flags & NETDEV_TX_OFFLOAD_IPV4_CKSUM)) { > - dp_packet_ip_set_header_csum(p); > + dp_packet_ip_set_header_csum(p, tnl_inner); > dp_packet_ol_set_ip_csum_good(p); > dp_packet_hwol_reset_tx_ip_csum(p); > } > @@ -562,24 +593,24 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t > flags) > return; > } > > - if (dp_packet_l4_checksum_good(p)) { > + if (dp_packet_l4_checksum_good(p) && (!tnl_inner)) { > dp_packet_hwol_reset_tx_l4_csum(p); > return; > } > > if (dp_packet_hwol_l4_is_tcp(p) > && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { > - packet_tcp_complete_csum(p); > + packet_tcp_complete_csum(p, tnl_inner); > dp_packet_ol_set_l4_csum_good(p); > dp_packet_hwol_reset_tx_l4_csum(p); > } else if (dp_packet_hwol_l4_is_udp(p) > && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { > - packet_udp_complete_csum(p); > + packet_udp_complete_csum(p, tnl_inner); > dp_packet_ol_set_l4_csum_good(p); > dp_packet_hwol_reset_tx_l4_csum(p); > } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) > && dp_packet_hwol_l4_is_sctp(p)) { > - packet_sctp_complete_csum(p); > + packet_sctp_complete_csum(p, tnl_inner); > dp_packet_ol_set_l4_csum_good(p); > dp_packet_hwol_reset_tx_l4_csum(p); > } > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 70ddf8aa4..80c7ab961 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -86,22 +86,47 @@ enum dp_packet_offload_mask { > DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800), > /* Offload IP checksum. */ > DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000), > + /* Offload packet is tunnel GENEVE. */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GENEVE, > + RTE_MBUF_F_TX_TUNNEL_GENEVE, 0x2000), > + /* Offload packet is tunnel VXLAN. */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, > + RTE_MBUF_F_TX_TUNNEL_VXLAN, 0x4000), > + /* Offload tunnel packet, out is ipv4 */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV4, > + RTE_MBUF_F_TX_OUTER_IPV4, 0x8000), > + /* Offload TUNNEL out ipv4 checksum */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IP_CKSUM, > + RTE_MBUF_F_TX_OUTER_IP_CKSUM, 0x10000), > + /* Offload TUNNEL out udp checksum */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_UDP_CKSUM, > + RTE_MBUF_F_TX_OUTER_UDP_CKSUM, 0x20000), > + /* Offload tunnel packet, out is ipv6 */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6, > + RTE_MBUF_F_TX_OUTER_IPV6, 0x40000), > + > /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ > }; > > -#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ > - DP_PACKET_OL_FLOW_MARK | \ > - DP_PACKET_OL_RX_L4_CKSUM_BAD | \ > - DP_PACKET_OL_RX_IP_CKSUM_BAD | \ > - DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ > - DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ > - DP_PACKET_OL_TX_TCP_SEG | \ > - DP_PACKET_OL_TX_IPV4 | \ > - DP_PACKET_OL_TX_IPV6 | \ > - DP_PACKET_OL_TX_TCP_CKSUM | \ > - DP_PACKET_OL_TX_UDP_CKSUM | \ > - DP_PACKET_OL_TX_SCTP_CKSUM | \ > - DP_PACKET_OL_TX_IP_CKSUM) > +#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ > + DP_PACKET_OL_FLOW_MARK | \ > + DP_PACKET_OL_RX_L4_CKSUM_BAD | \ > + DP_PACKET_OL_RX_IP_CKSUM_BAD | \ > + DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ > + DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ > + DP_PACKET_OL_TX_TCP_SEG | \ > + DP_PACKET_OL_TX_IPV4 | \ > + DP_PACKET_OL_TX_IPV6 | \ > + DP_PACKET_OL_TX_TCP_CKSUM | \ > + DP_PACKET_OL_TX_UDP_CKSUM | \ > + DP_PACKET_OL_TX_SCTP_CKSUM | \ > + DP_PACKET_OL_TX_IP_CKSUM | \ > + DP_PACKET_OL_TX_TUNNEL_GENEVE | \ > + DP_PACKET_OL_TX_TUNNEL_VXLAN | \ > + DP_PACKET_OL_TX_OUTER_IPV4 | \ > + DP_PACKET_OL_TX_OUTER_IP_CKSUM | \ > + DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \ > + DP_PACKET_OL_TX_OUTER_IPV6) > > #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ > DP_PACKET_OL_TX_UDP_CKSUM | \ > @@ -138,6 +163,10 @@ struct dp_packet { > * or UINT16_MAX. */ > uint16_t l4_ofs; /* Transport-level header offset, > or UINT16_MAX. */ > + uint16_t inner_l3_ofs; /* inner Network-level header offset, > + * or UINT16_MAX. */ > + uint16_t inner_l4_ofs; /* inner Transport-level header offset, > + or UINT16_MAX. */ > uint32_t cutlen; /* length in bytes to cut from the end. */ > ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ > uint16_t csum_start; /* Position to start checksumming from. */ > @@ -246,6 +275,9 @@ bool dp_packet_compare_offsets(struct dp_packet *good, > struct dp_packet *test, > struct ds *err_str); > void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t); > +void dp_packet_tnl_outer_ol_send_prepare(struct dp_packet *p, > + uint64_t flags); > + > > > /* Frees memory that 'b' points to, as well as 'b' itself. */ > @@ -478,6 +510,22 @@ dp_packet_l4_size(const struct dp_packet *b) > : 0; > } > > +static inline void * > +dp_packet_inner_l3(const struct dp_packet *b) > +{ > + return b->inner_l3_ofs != UINT16_MAX > + ? (char *) dp_packet_data(b) + b->inner_l3_ofs > + : NULL; > +} > + > +static inline void * > +dp_packet_inner_l4(const struct dp_packet *b) > +{ > + return b->inner_l4_ofs != UINT16_MAX > + ? (char *) dp_packet_data(b) + b->inner_l4_ofs > + : NULL; > +} > + > static inline const void * > dp_packet_get_tcp_payload(const struct dp_packet *b) > { > @@ -535,6 +583,25 @@ dp_packet_get_nd_payload(const struct dp_packet *b) > } > > #ifdef DPDK_NETDEV > +static inline void > +dp_packet_set_l2_len(struct dp_packet *b, size_t l2_len) > +{ > + b->mbuf.l2_len = l2_len; > +} > + > +static inline void > +dp_packet_set_l3_len(struct dp_packet *b, size_t l3_len) > +{ > + b->mbuf.l3_len = l3_len; > +} > + > +static inline void > +dp_packet_set_l4_len(struct dp_packet *b, size_t l4_len) > +{ > + b->mbuf.l4_len = l4_len; > +} > + > + > static inline uint64_t * > dp_packet_ol_flags_ptr(const struct dp_packet *b) > { > @@ -554,6 +621,24 @@ dp_packet_flow_mark_ptr(const struct dp_packet *b) > } > > #else > +static inline void > +dp_packet_set_l2_len(struct dp_packet *b OVS_UNUSED, size_t l2_len > OVS_UNUSED) > +{ > + /* There are no implementation */ > +} > + > +static inline void > +dp_packet_set_l3_len(struct dp_packet *b OVS_UNUSED, size_t l3_len > OVS_UNUSED) > +{ > + /* There are no implementation */ > +} > + > +static inline void > +dp_packet_set_l4_len(struct dp_packet *b OVS_UNUSED, size_t l4_len > OVS_UNUSED) > +{ > + /* There are no implementation */ > +} > + > static inline uint32_t * > dp_packet_ol_flags_ptr(const struct dp_packet *b) > { > @@ -615,9 +700,10 @@ dp_packet_set_size(struct dp_packet *b, uint32_t v) > * (and thus 'v') will always be <= UINT16_MAX; this means that there is > no > * loss of accuracy in assigning 'v' to 'data_len'. > */ > - b->mbuf.data_len = (uint16_t)v; /* Current seg length. */ > - b->mbuf.pkt_len = v; /* Total length of all segments linked > to > - * this segment. */ > + /* Current seg length. */ > + b->mbuf.data_len += (uint16_t)(v - b->mbuf.pkt_len); > + /* Total length of all segments linked to this segment. */ > + b->mbuf.pkt_len = v; > } > > static inline uint16_t > @@ -1030,6 +1116,43 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) > DP_PACKET_OL_TX_SCTP_CKSUM; > } > > +/* Returns 'true' if packet 'b' is marked for tunnel GENEVE > + * checksum offloading. */ > +static inline bool > +dp_packet_hwol_is_tunnel_geneve(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GENEVE); > +} > + > +/* Returns 'true' if packet 'b' is marked for tunnel VXLAN > + * checksum offloading. */ > +static inline bool > +dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN); > +} > + > +/* Returns 'true' if packet 'b' is marked for out ipv4. */ > +static inline bool > +dp_packet_hwol_is_outer_ipv4(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_IPV4); > +} > + > +/* Returns 'true' if packet 'b' is marked for out ipv4 csum offload. */ > +static inline bool > +dp_packet_hwol_is_outer_ipv4_cksum(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_IP_CKSUM); > +} > + > +/* Returns 'true' if packet 'b' is marked for out udp csum offload. */ > +static inline bool > +dp_packet_hwol_is_outer_UDP_cksum(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_UDP_CKSUM); > +} > + > static inline void > dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) > { > @@ -1052,6 +1175,14 @@ dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) > *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; > } > > +/* Mark packet 'a' as IPv6. */ > +static inline void > +dp_packet_hwol_set_tx_outer_ipv6(struct dp_packet *a) > +{ > + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_OUTER_IPV4; > + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_OUTER_IPV6; > +} > + > /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ > static inline bool > dp_packet_hwol_tx_ip_csum(const struct dp_packet *p) > @@ -1105,6 +1236,55 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +/* Mark packet 'b' for tunnel geneve offloading. It implies that > + * the packet 'b' is marked for tunnel geneve offloading. */ > +static inline void > +dp_packet_hwol_set_tunnel_geneve(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GENEVE; > +} > + > +/* Mark packet 'b' for tunnel vxlan offloading. It implies that > + * the packet 'b' is marked for tunnel vxlan offloading. */ > +static inline void > +dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN; > +} > + > +/* Mark packet 'b' for out ipv4 packet. */ > +static inline void > +dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IPV4; > +} > + > +/* Mark packet 'b' for out ipv4 csum offloading. */ > +static inline void > +dp_packet_hwol_set_tx_outer_ipv4_csum(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; > +} > + > +static inline void > +dp_packet_hwol_reset_outer_ipv4_csum(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_OUTER_IP_CKSUM; > +} > + > +static inline void > +dp_packet_hwol_reset_outer_udp_csum(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_OUTER_UDP_CKSUM; > +} > + > +/* Mark packet 'b' for out udp csum offloading. */ > +static inline void > +dp_packet_hwol_set_outer_udp_csum(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM; > +} > + > /* Returns 'true' if the IP header has good integrity and the > * checksum in it is complete. */ > static inline bool > @@ -1139,9 +1319,9 @@ dp_packet_ip_checksum_bad(const struct dp_packet *p) > > /* Calculate and set the IPv4 header checksum in packet 'p'. */ > static inline void > -dp_packet_ip_set_header_csum(struct dp_packet *p) > +dp_packet_ip_set_header_csum(struct dp_packet *p, bool inner) > { > - struct ip_header *ip = dp_packet_l3(p); > + struct ip_header *ip = (inner) ? dp_packet_inner_l3(p) : dp_packet_l3(p); > > ovs_assert(ip); > ip->ip_csum = 0; > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 157694bcf..55f0183c7 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -7976,7 +7976,9 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, > struct dp_packet *packet_, > ds_destroy(&ds); > } > > - dp_packet_ol_send_prepare(packet_, 0); > + if (type != DPIF_UC_MISS) { > + dp_packet_ol_send_prepare(packet_, 0); > + } > > return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, > actions, wc, put_actions, dp->upcall_aux); > diff --git a/lib/flow.c b/lib/flow.c > index fe226cf0f..8f4c38f94 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -3278,7 +3278,7 @@ packet_expand(struct dp_packet *p, const struct flow > *flow, size_t size) > if (dp_packet_hwol_tx_ip_csum(p)) { > dp_packet_ol_reset_ip_csum_good(p); > } else { > - dp_packet_ip_set_header_csum(p); > + dp_packet_ip_set_header_csum(p, false); > dp_packet_ol_set_ip_csum_good(p); > } > pseudo_hdr_csum = packet_csum_pseudoheader(ip); > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 55700250d..80d23da5c 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -416,6 +416,10 @@ enum dpdk_hw_ol_features { > NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, > NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, > NETDEV_TX_TSO_OFFLOAD = 1 << 7, > + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 8, > + NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9, > + NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10, > + NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11, > }; > > enum dpdk_rx_steer_flags { > @@ -1075,6 +1079,14 @@ netdev_dpdk_update_netdev_flags(struct netdev_dpdk > *dev) > NETDEV_TX_OFFLOAD_SCTP_CKSUM); > netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, > NETDEV_TX_OFFLOAD_TCP_TSO); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD, > + NETDEV_TX_VXLAN_TNL_TSO); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD, > + NETDEV_TX_GENEVE_TNL_TSO); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); > } > > static int > @@ -1129,6 +1141,23 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int > n_rxq, int n_txq) > conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; > } > > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM; > + } > + > + > /* Limit configured rss hash functions to only those supported > * by the eth device. */ > conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; > @@ -1346,6 +1375,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > } > > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD; > + } > + > dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > if (userspace_tso_enabled()) { > if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { > @@ -1354,6 +1395,20 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > VLOG_WARN("%s: Tx TSO offload is not supported.", > netdev_get_name(&dev->up)); > } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } else { > + VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.", > + netdev_get_name(&dev->up)); > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD; > + } else { > + VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.", > + netdev_get_name(&dev->up)); > + } > } > > n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); > @@ -2445,11 +2500,23 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > return true; > } > > - mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); > - mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); > - mbuf->l4_len = 0; > - mbuf->outer_l2_len = 0; > - mbuf->outer_l3_len = 0; > + /* If packet is vxlan or geneve tunnel packet, calculate outer > + * l2 len and outer l3 len. Inner l2/l3/l4 len are calculated > + * before. */ > + if (mbuf->ol_flags & > + (RTE_MBUF_F_TX_TUNNEL_GENEVE | RTE_MBUF_F_TX_TUNNEL_VXLAN)) { > + mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) - > + (char *) dp_packet_eth(pkt); > + mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) - > + (char *) dp_packet_l3(pkt); > + } else { > + mbuf->l2_len = (char *) dp_packet_l3(pkt) - > + (char *) dp_packet_eth(pkt); > + mbuf->l3_len = (char *) dp_packet_l4(pkt) - > + (char *) dp_packet_l3(pkt); > + mbuf->outer_l2_len = 0; > + mbuf->outer_l3_len = 0; > + } > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > struct tcp_header *th = dp_packet_l4(pkt); > @@ -2460,9 +2527,16 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > return false; > } > > - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > + if (mbuf->ol_flags & (RTE_MBUF_F_TX_TUNNEL_GENEVE | > + RTE_MBUF_F_TX_TUNNEL_VXLAN)) { > + mbuf->tso_segsz = dev->mtu - mbuf->l2_len - mbuf->l3_len - > + mbuf->l4_len - mbuf->outer_l3_len; > + } else { > + mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > + mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + } > + > mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; > - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > > if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { > mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; > diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c > index 1a54add87..ae3b9511d 100644 > --- a/lib/netdev-dummy.c > +++ b/lib/netdev-dummy.c > @@ -1191,7 +1191,7 @@ netdev_dummy_send(struct netdev *netdev, int qid, > > if (dp_packet_hwol_tx_ip_csum(packet) && > !dp_packet_ip_checksum_good(packet)) { > - dp_packet_ip_set_header_csum(packet); > + dp_packet_ip_set_header_csum(packet, false); > dp_packet_ol_set_ip_csum_good(packet); > } > > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index a0682c70f..505c4efdc 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -173,15 +173,29 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, > const void *header, > ip6->ip6_plen = htons(*ip_tot_size); > packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label); > packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; > - dp_packet_hwol_set_tx_ipv6(packet); > + > + if (dp_packet_hwol_is_tunnel_geneve(packet) || > + dp_packet_hwol_is_tunnel_vxlan(packet)) { > + dp_packet_hwol_set_tx_outer_ipv6(packet); > + } else { > + dp_packet_hwol_set_tx_ipv6(packet); > + } > + > dp_packet_ol_reset_ip_csum_good(packet); > return ip6 + 1; > } else { > ip = netdev_tnl_ip_hdr(eth); > ip->ip_tot_len = htons(*ip_tot_size); > /* Postpone checksum to when the packet is pushed to the port. */ > - dp_packet_hwol_set_tx_ipv4(packet); > - dp_packet_hwol_set_tx_ip_csum(packet); > + if (dp_packet_hwol_is_tunnel_geneve(packet) || > + dp_packet_hwol_is_tunnel_vxlan(packet)) { > + dp_packet_hwol_set_tx_outer_ipv4(packet); > + dp_packet_hwol_set_tx_outer_ipv4_csum(packet); > + } else { > + dp_packet_hwol_set_tx_ipv4(packet); > + dp_packet_hwol_set_tx_ip_csum(packet); > + } > + > dp_packet_ol_reset_ip_csum_good(packet); > *ip_tot_size -= IP_HEADER_LEN; > packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; > @@ -226,14 +240,84 @@ udp_extract_tnl_md(struct dp_packet *packet, struct > flow_tnl *tnl, > return udp + 1; > } > > +/* Calculate inner l2 l3 l4 len as tunnel outer header is not > + * encapsulated now. */ > +static void > +dp_packet_tnl_ol_process(const struct netdev *netdev, > + struct dp_packet *packet, > + const struct ovs_action_push_tnl *data) > +{ > + struct udp_header *udp = NULL; > + uint8_t opt_len = 0; > + struct eth_header *eth = NULL; > + struct ip_header *ip = NULL; > + struct genevehdr *gnh = NULL; > + > + /* l2 l3 l4 len refer to inner len, tunnel outer > + * header is not encapsulated here. */ > + if (dp_packet_hwol_l4_mask(packet)) { > + ip = dp_packet_l3(packet); > + > + if (ip->ip_proto == IPPROTO_TCP) { > + struct tcp_header *th = dp_packet_l4(packet); > + dp_packet_set_l4_len(packet, TCP_OFFSET(th->tcp_ctl) * 4); > + } else if (ip->ip_proto == IPPROTO_UDP) { > + dp_packet_set_l4_len(packet, UDP_HEADER_LEN); > + } else if (ip->ip_proto == IPPROTO_SCTP) { > + dp_packet_set_l4_len(packet, SCTP_HEADER_LEN); > + } > + > + dp_packet_set_l3_len(packet, (char *) dp_packet_l4(packet) - > + (char *) dp_packet_l3(packet)); > + > + if (!strcmp(netdev_get_type(netdev), "geneve") || > + !strcmp(netdev_get_type(netdev), "vxlan")) { > + > + if (IP_VER(ip->ip_ihl_ver) == 4) { > + dp_packet_hwol_set_tx_ipv4(packet); > + dp_packet_hwol_tx_ip_csum(packet); > + } else if (IP_VER(ip->ip_ihl_ver) == 6) { > + dp_packet_hwol_set_tx_ipv6(packet); > + } > + } > + > + /* Attention please, tunnel inner l2 len is consist of udp header > + * len and tunnel header len and inner l2 len. */ > + if (!strcmp(netdev_get_type(netdev), "geneve")) { > + eth = (struct eth_header *)(data->header); > + ip = (struct ip_header *)(eth + 1); > + udp = (struct udp_header *)(ip + 1); > + gnh = (struct genevehdr *)(udp + 1); > + opt_len = gnh->opt_len * 4; > + dp_packet_hwol_set_tunnel_geneve(packet); > + dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) - > + (char *) dp_packet_eth(packet) + > + GENEVE_BASE_HLEN + opt_len); > + > + packet->inner_l3_ofs = packet->l3_ofs + GENEVE_BASE_HLEN + > opt_len; > + packet->inner_l4_ofs = packet->l4_ofs + GENEVE_BASE_HLEN + > opt_len; > + > + } else if (!strcmp(netdev_get_type(netdev), "vxlan")) { > + dp_packet_hwol_set_tunnel_vxlan(packet); > + dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) - > + (char *) dp_packet_eth(packet) + > + VXLAN_HLEN); > + > + packet->inner_l3_ofs = packet->l3_ofs + VXLAN_HLEN; > + packet->inner_l4_ofs = packet->l4_ofs + VXLAN_HLEN; > + } > + } > +} > + > void > -netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, > +netdev_tnl_push_udp_header(const struct netdev *netdev, > struct dp_packet *packet, > const struct ovs_action_push_tnl *data) > { > struct udp_header *udp; > int ip_tot_size; > > + dp_packet_tnl_ol_process(netdev, packet, data); > udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, > &ip_tot_size, 0); > > @@ -241,13 +325,21 @@ netdev_tnl_push_udp_header(const struct netdev *netdev > OVS_UNUSED, > udp->udp_src = netdev_tnl_get_src_port(packet); > udp->udp_len = htons(ip_tot_size); > > - /* Postpone checksum to the egress netdev. */ > - dp_packet_hwol_set_csum_udp(packet);
Hello Dexia, I noted on the previous version that there's an issue with removing this line. Don't we want to set this in all cases? packet_udp_complete_csum already checks for udp_csum==NULL so we don't have to worry about that. Cheers, Mike > if (udp->udp_csum) { > dp_packet_ol_reset_l4_csum_good(packet); > + if (!strcmp(netdev_get_type(netdev), "geneve") || > + !strcmp(netdev_get_type(netdev), "vxlan")) { > + dp_packet_hwol_set_outer_udp_csum(packet); > + } else { > + dp_packet_hwol_set_csum_udp(packet); > + } > } else { > - dp_packet_ol_set_l4_csum_good(packet); > + dp_packet_ol_set_l4_csum_good(packet); > } > + > + packet->inner_l3_ofs += packet->l4_ofs; > + packet->inner_l4_ofs += packet->l4_ofs; > + > } > > static void * > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index a7393c7ce..22840a058 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -43,6 +43,10 @@ enum netdev_ol_flags { > NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2, > NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3, > NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4, > + NETDEV_TX_VXLAN_TNL_TSO = 1 << 5, > + NETDEV_TX_GENEVE_TNL_TSO = 1 << 6, > + NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7, > + NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8, > }; > > /* A network device (e.g. an Ethernet device). > diff --git a/lib/netdev.c b/lib/netdev.c > index e5ac7713d..830712ead 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -806,6 +806,19 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, > return false; > } > > + if (dp_packet_hwol_is_tunnel_vxlan(packet) > + && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) { > + VLOG_ERR_BUF(errormsg, "No VXLAN TSO support"); > + return false; > + } > + > + if (dp_packet_hwol_is_tunnel_geneve(packet) > + && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) { > + VLOG_ERR_BUF(errormsg, "No GENEVE TSO support"); > + return false; > + } > + > + > /* Packet with IP csum offloading enabled was received with verified > csum. > * Leave the IP csum offloading enabled even with good checksum to the > * netdev to decide what would be the best to do. > @@ -953,17 +966,19 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { > + if (OVS_UNLIKELY(strcmp(netdev_get_type(netdev), "vxlan") && > + strcmp(netdev_get_type(netdev), "geneve") && > + dp_packet_hwol_is_tso(packet))) { > COVERAGE_INC(netdev_push_header_drops); > dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is " > - "not supported: packet dropped", > - netdev_get_name(netdev)); > + VLOG_WARN_RL(&rl, "%s: Tunneling packets with tso HW offload" > + "flags is not supported: packet dropped", > + netdev_get_name(netdev)); > } else { > - /* The packet is going to be encapsulated and there is > - * no support yet for inner network header csum offloading. */ > - dp_packet_ol_send_prepare(packet, 0); > - > + if (strcmp(netdev_get_type(netdev), "vxlan") && > + strcmp(netdev_get_type(netdev), "geneve")) { > + dp_packet_ol_send_prepare(packet, 0); > + } > netdev->netdev_class->push_header(netdev, packet, data); > > pkt_metadata_init(&packet->md, data->out_port); > @@ -1409,6 +1424,10 @@ netdev_get_status(const struct netdev *netdev, struct > smap *smap) > OL_ADD_STAT("udp_csum", NETDEV_TX_OFFLOAD_UDP_CKSUM); > OL_ADD_STAT("sctp_csum", NETDEV_TX_OFFLOAD_SCTP_CKSUM); > OL_ADD_STAT("tcp_seg", NETDEV_TX_OFFLOAD_TCP_TSO); > + OL_ADD_STAT("vxlan_tso", NETDEV_TX_VXLAN_TNL_TSO); > + OL_ADD_STAT("geneve_tso", NETDEV_TX_GENEVE_TNL_TSO); > + OL_ADD_STAT("out_ip_csum", NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); > + OL_ADD_STAT("out_udp_csum", NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); > #undef OL_ADD_STAT > > err = 0; > diff --git a/lib/packets.c b/lib/packets.c > index 462b51f92..117c8091c 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -1997,9 +1997,9 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) > /* Set TCP checksum field in packet 'p' with complete checksum. > * The packet must have the L3 and L4 offsets. */ > void > -packet_tcp_complete_csum(struct dp_packet *p) > +packet_tcp_complete_csum(struct dp_packet *p, bool inner) > { > - struct tcp_header *tcp = dp_packet_l4(p); > + struct tcp_header *tcp = (inner) ? dp_packet_inner_l4(p) : > dp_packet_l4(p); > > tcp->tcp_csum = 0; > if (dp_packet_hwol_is_ipv4(p)) { > @@ -2020,9 +2020,9 @@ packet_tcp_complete_csum(struct dp_packet *p) > /* Set UDP checksum field in packet 'p' with complete checksum. > * The packet must have the L3 and L4 offsets. */ > void > -packet_udp_complete_csum(struct dp_packet *p) > +packet_udp_complete_csum(struct dp_packet *p, bool inner) > { > - struct udp_header *udp = dp_packet_l4(p); > + struct udp_header *udp = (inner) ? dp_packet_inner_l4(p) : > dp_packet_l4(p); > > /* Skip csum calculation if the udp_csum is zero. */ > if (!udp->udp_csum) { > @@ -2052,9 +2052,9 @@ packet_udp_complete_csum(struct dp_packet *p) > /* Set SCTP checksum field in packet 'p' with complete checksum. > * The packet must have the L3 and L4 offsets. */ > void > -packet_sctp_complete_csum(struct dp_packet *p) > +packet_sctp_complete_csum(struct dp_packet *p, bool inner) > { > - struct sctp_header *sh = dp_packet_l4(p); > + struct sctp_header *sh = (inner) ? dp_packet_inner_l4(p) : > dp_packet_l4(p); > uint16_t tp_len = dp_packet_l4_size(p); > ovs_be32 csum; > > diff --git a/lib/packets.h b/lib/packets.h > index 12245b764..8b6994809 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -1682,9 +1682,9 @@ uint32_t packet_csum_pseudoheader(const struct > ip_header *); > bool packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr, > bool *first_frag); > void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); > -void packet_tcp_complete_csum(struct dp_packet *); > -void packet_udp_complete_csum(struct dp_packet *); > -void packet_sctp_complete_csum(struct dp_packet *); > +void packet_tcp_complete_csum(struct dp_packet *, bool is_inner); > +void packet_udp_complete_csum(struct dp_packet *, bool is_inner); > +void packet_sctp_complete_csum(struct dp_packet *, bool is_inner); > > #define DNS_HEADER_LEN 12 > struct dns_header { > diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at > index 85119fb81..0f6271760 100644 > --- a/tests/dpif-netdev.at > +++ b/tests/dpif-netdev.at > @@ -658,11 +658,11 @@ OVS_VSWITCHD_START( > other-config:datapath-id=1234 fail-mode=secure]) > > AT_CHECK([ovs-vsctl get interface p1 status | sed -n 's/^{\(.*\).*}$/\1/p'], > [0], [dnl > -tx_ip_csum_offload="false", tx_sctp_csum_offload="false", > tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", > tx_udp_csum_offload="false" > +tx_geneve_tso_offload="false", tx_ip_csum_offload="false", > tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", > tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", > tx_tcp_seg_offload="false", tx_udp_csum_offload="false", > tx_vxlan_tso_offload="false" > ], []) > > AT_CHECK([ovs-vsctl get interface br0 status | sed -n > 's/^{\(.*\).*}$/\1/p'], [0], [dnl > -tx_ip_csum_offload="false", tx_sctp_csum_offload="false", > tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", > tx_udp_csum_offload="false" > +tx_geneve_tso_offload="false", tx_ip_csum_offload="false", > tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", > tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", > tx_tcp_seg_offload="false", tx_udp_csum_offload="false", > tx_vxlan_tso_offload="false" > ], []) > > OVS_VSWITCHD_STOP > -- > 2.33.0.windows.2 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev