Re: [ovs-dev] 答复: [PATCH V3 2/4] Add GSO support for DPDK data path

2021-02-07 Thread William Tu
On Tue, Oct 27, 2020 at 6:02 PM Yi Yang (杨燚)-云服务集团  wrote:
>
> -邮件原件-
> 发件人: dev [mailto:ovs-dev-boun...@openvswitch.org] 代表 Ilya Maximets
> 发送时间: 2020年10月27日 21:03
> 收件人: yang_y...@163.com; ovs-dev@openvswitch.org
> 抄送: f...@sysclose.org; i.maxim...@ovn.org
> 主题: Re: [ovs-dev] [PATCH V3 2/4] Add GSO support for DPDK data path
>
> On 8/7/20 12:56 PM, yang_y...@163.com wrote:
> > From: Yi Yang 
> >
> > GSO(Generic Segment Offload) can segment large UDP  and TCP packet to
> > small packets per MTU of destination , especially for the case that
> > physical NIC can't do hardware offload VXLAN TSO and VXLAN UFO, GSO
> > can make sure userspace TSO can still work but not drop.
> >
> > In addition, GSO can help improve UDP performane when UFO is enabled
> > in VM.
> >
> > GSO can support TCP, UDP, VXLAN TCP, VXLAN UDP, it is done in Tx
> > function of physical NIC.
> >
> > Signed-off-by: Yi Yang 
> > ---
> >  lib/dp-packet.h|  21 +++-
> >  lib/netdev-dpdk.c  | 358
> > +
> >  lib/netdev-linux.c |  17 ++-
> >  lib/netdev.c   |  67 +++---
> >  4 files changed, 417 insertions(+), 46 deletions(-)

snip

> >
> > @@ -2339,24 +2428,19 @@ netdev_dpdk_prep_hwol_batch(struct netdev_dpdk 
> > *dev, struct rte_mbuf **pkts,
> >  return cnt;
> >  }
> >
> > -/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes
> > ownership of
> > - * 'pkts', even in case of failure.
> > - *
> > - * Returns the number of packets that weren't transmitted. */  static
> > inline int -netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> > - struct rte_mbuf **pkts, int cnt)
> > +__netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> > +   struct rte_mbuf **pkts, int cnt)
> >  {
> >  uint32_t nb_tx = 0;
> > -uint16_t nb_tx_prep = cnt;
> > +uint32_t nb_tx_prep;
> >
> > -if (userspace_tso_enabled()) {
> > -nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> > -if (nb_tx_prep != cnt) {
> > -VLOG_WARN_RL(, "%s: Output batch contains invalid packets. "
> > - "Only %u/%u are valid: %s", dev->up.name, 
> > nb_tx_prep,
> > - cnt, rte_strerror(rte_errno));
> > -}
> > +nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> > +if (nb_tx_prep != cnt) {
> > +VLOG_WARN_RL(, "%s: Output batch contains invalid packets. "
> > +  "Only %u/%u are valid: %s",
> > + dev->up.name, nb_tx_prep,
> > + cnt, rte_strerror(rte_errno));
> >  }
> >
> >  while (nb_tx != nb_tx_prep) {
> > @@ -2384,6 +2468,200 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, 
> > int qid,
> >  return cnt - nb_tx;
> >  }
> >
> > +static inline void
> > +set_multiseg_udptcp_cksum(struct rte_mbuf *mbuf)
>
> I didn't review the patch, only had a quick glance, but this part bothers me. 
>  OVS doesn't support multi-segment mbufs, so it should not be possible for 
> such mbufs being transmitted by OVS.  So, I do not understand why this 
> function needs to work with such mbufs.
>
> [Yi Yang] Only DPDK driver/Tx function will use it, not OVS, 
> set_multiseg_udptcp_cksum is called in GSO part, it is last step before Tx 
> function, it is a big external mbuf before rte_gso_segment, that isn't a 
> multi-segmented mbuf.
>

Hi Ilya,

Now I understand Yi Yang's point better and I agree with him.
Looks like the patch does the GSO at the DPDK TX function.
It creates multi-seg mbuf after rte_gso_segment(), but will immediately
send out the multi-seg mbuf to DPDK port, without traversing inside other
part of OVS code. I guess this case it should work OK?

William
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] 答复: [PATCH V3 2/4] Add GSO support for DPDK data path

2020-10-27 Thread 杨燚
-邮件原件-
发件人: dev [mailto:ovs-dev-boun...@openvswitch.org] 代表 Ilya Maximets
发送时间: 2020年10月27日 21:03
收件人: yang_y...@163.com; ovs-dev@openvswitch.org
抄送: f...@sysclose.org; i.maxim...@ovn.org
主题: Re: [ovs-dev] [PATCH V3 2/4] Add GSO support for DPDK data path

On 8/7/20 12:56 PM, yang_y...@163.com wrote:
> From: Yi Yang 
> 
> GSO(Generic Segment Offload) can segment large UDP  and TCP packet to 
> small packets per MTU of destination , especially for the case that 
> physical NIC can't do hardware offload VXLAN TSO and VXLAN UFO, GSO 
> can make sure userspace TSO can still work but not drop.
> 
> In addition, GSO can help improve UDP performane when UFO is enabled 
> in VM.
> 
> GSO can support TCP, UDP, VXLAN TCP, VXLAN UDP, it is done in Tx 
> function of physical NIC.
> 
> Signed-off-by: Yi Yang 
> ---
>  lib/dp-packet.h|  21 +++-
>  lib/netdev-dpdk.c  | 358 
> +
>  lib/netdev-linux.c |  17 ++-
>  lib/netdev.c   |  67 +++---
>  4 files changed, 417 insertions(+), 46 deletions(-)
> 
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 79895f2..c33868d 
> 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -83,6 +83,8 @@ enum dp_packet_offload_mask {
>  DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM, 0x800),
>  /* VXLAN TCP Segmentation Offload. */
>  DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_VXLAN, 
> 0x1000),
> +/* UDP Segmentation Offload. */
> +DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_SEG, PKT_TX_UDP_SEG, 0x2000),
>  /* Adding new field requires adding to 
> DP_PACKET_OL_SUPPORTED_MASK. */  };
>  
> @@ -97,7 +99,8 @@ enum dp_packet_offload_mask {
>   DP_PACKET_OL_TX_IPV6  | \
>   DP_PACKET_OL_TX_TCP_CKSUM | \
>   DP_PACKET_OL_TX_UDP_CKSUM | \
> - DP_PACKET_OL_TX_SCTP_CKSUM)
> + DP_PACKET_OL_TX_SCTP_CKSUM| \
> + DP_PACKET_OL_TX_UDP_SEG)
>  
>  #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
>   DP_PACKET_OL_TX_UDP_CKSUM | \ @@ 
> -956,6 +959,13 @@ dp_packet_hwol_is_tso(const struct dp_packet *b)
>  return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TCP_SEG);  
> }
>  
> +/* Returns 'true' if packet 'b' is marked for UDP segmentation 
> +offloading. */ static inline bool dp_packet_hwol_is_uso(const struct 
> +dp_packet *b) {
> +return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_UDP_SEG); 
> +}
> +
>  /* Returns 'true' if packet 'b' is marked for IPv4 checksum 
> offloading. */  static inline bool  dp_packet_hwol_is_ipv4(const 
> struct dp_packet *b) @@ -1034,6 +1044,15 @@ 
> dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
>  *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;  }
>  
> +/* Mark packet 'b' for UDP segmentation offloading.  It implies that
> + * either the packet 'b' is marked for IPv4 or IPv6 checksum 
> +offloading
> + * and also for UDP checksum offloading. */ static inline void 
> +dp_packet_hwol_set_udp_seg(struct dp_packet *b) {
> +*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_SEG; }
> +
>  #ifdef DPDK_NETDEV
>  /* Mark packet 'b' for VXLAN TCP segmentation offloading. */  static 
> inline void diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 
> 30493ed..888a45e 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -38,13 +38,15 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
> -#include 
> +#include 
>  
>  #include "cmap.h"
>  #include "coverage.h"
> @@ -162,6 +164,7 @@ typedef uint16_t dpdk_port_t;
> | DEV_TX_OFFLOAD_UDP_CKSUM\
> | DEV_TX_OFFLOAD_IPV4_CKSUM)
>  
> +#define MAX_GSO_MBUFS 64
>  
>  static const struct rte_eth_conf port_conf = {
>  .rxmode = {
> @@ -2171,6 +2174,16 @@ is_local_to_local(uint16_t src_port_id, struct 
> netdev_dpdk *dev)
>  return ret;
>  }
>  
> +static uint16_t
> +get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) {
> +if (ethertype == htons(RTE_ETHER_TYPE_IPV4)) {
> +return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> +} else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
> +return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> +}
> +}
> +
>  /* Prepare the packet for HWOL.
>   * Return True if the packet is OK to continue. */  static bool @@ 
> -2203,10 +2216,9 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, 
> struct rte_mbuf *mbuf)
>   * also can't be handled. So PKT_TX_TUNNEL_VXLAN must be cleared
>   * outer_l2_len and outer_l3_len must be zeroed.
>   */
> -if (!(dev->up.ol_flags & NETDEV_TX_OFFLOAD_VXLAN_TSO)
> -