On 10/31/23 20:51, Mike Pattrick wrote:
> From: Flavio Leitner <f...@sysclose.org>
> 
> Currently OVS will calculate the segment size based on the
> MTU of the egress port. That usually happens to be correct
> when the ports share the same MTU, but that is not always true.
> 
> Therefore, if the segment size is provided, then use that and
> make sure the over sized packets are dropped.
> 
> Signed-off-by: Flavio Leitner <f...@sysclose.org>
> Co-authored-by: Mike Pattrick <m...@redhat.com>
> Signed-off-by: Mike Pattrick <m...@redhat.com>
> ---
>  lib/dp-packet.c    |  3 ++
>  lib/dp-packet.h    | 26 ++++++++++++++++
>  lib/netdev-dpdk.c  | 12 +++++++-
>  lib/netdev-linux.c | 76 ++++++++++++++++++++++++++++++++--------------
>  4 files changed, 94 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index ed004c3b9..920402369 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -34,6 +34,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, 
> enum dp_packet_source so
>      pkt_metadata_init(&b->md, 0);
>      dp_packet_reset_cutlen(b);
>      dp_packet_reset_offload(b);
> +    dp_packet_set_tso_segsz(b, 0);
>      /* Initialize implementation-specific fields of dp_packet. */
>      dp_packet_init_specific(b);
>      /* By default assume the packet type to be Ethernet. */
> @@ -203,6 +204,8 @@ dp_packet_clone_with_headroom(const struct dp_packet 
> *buffer, size_t headroom)
>      *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer);
>      *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK;
>  
> +    dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer));
> +
>      if (dp_packet_rss_valid(buffer)) {
>          dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer));
>      }
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 70ddf8aa4..6a924f3ff 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -126,6 +126,7 @@ struct dp_packet {
>      uint32_t ol_flags;          /* Offloading flags. */
>      uint32_t rss_hash;          /* Packet hash. */
>      uint32_t flow_mark;         /* Packet flow mark. */
> +    uint16_t tso_segsz;         /* TCP TSO segment size. */
>  #endif
>      enum dp_packet_source source;  /* Source of memory allocated as 'base'. 
> */
>  
> @@ -166,6 +167,9 @@ static inline void dp_packet_set_size(struct dp_packet *, 
> uint32_t);
>  static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
>  static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
>  
> +static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
> +static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);
> +
>  void *dp_packet_resize_l2(struct dp_packet *, int increment);
>  void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
>  static inline void *dp_packet_eth(const struct dp_packet *);
> @@ -644,6 +648,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>      b->mbuf.buf_len = s;
>  }
>  
> +static inline uint16_t
> +dp_packet_get_tso_segsz(const struct dp_packet *p)
> +{
> +    return p->mbuf.tso_segsz;
> +}
> +
> +static inline void
> +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
> +{
> +    p->mbuf.tso_segsz = s;
> +}
>  #else /* DPDK_NETDEV */
>  
>  static inline void
> @@ -700,6 +715,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>      b->allocated_ = s;
>  }
>  
> +static inline uint16_t
> +dp_packet_get_tso_segsz(const struct dp_packet *p)
> +{
> +    return p->tso_segsz;
> +}
> +
> +static inline void
> +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
> +{
> +    p->tso_segsz = s;
> +}
>  #endif /* DPDK_NETDEV */
>  
>  static inline void
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 55700250d..9f20cc689 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2453,6 +2453,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, 
> struct rte_mbuf *mbuf)
>  
>      if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
>          struct tcp_header *th = dp_packet_l4(pkt);
> +        int hdr_len;
>  
>          if (!th) {
>              VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
> @@ -2462,7 +2463,15 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, 
> struct rte_mbuf *mbuf)
>  
>          mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
>          mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
> +        hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
>          mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
> +        if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) 
> {
> +            VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. "
> +                         "hdr: %"PRIu32", gso: %"PRIu32", max len: 
> %"PRIu32"",
> +                         dev->up.name, hdr_len, mbuf->tso_segsz,
> +                         dev->max_packet_len);
> +            return false;
> +        }
>  
>          if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
>              mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
> @@ -2737,7 +2746,8 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, 
> struct rte_mbuf **pkts,
>      int cnt = 0;
>      struct rte_mbuf *pkt;
>  
> -    /* Filter oversized packets, unless are marked for TSO. */
> +    /* Filter oversized packets. The TSO packets are filtered out
> +     * during the offloading preparation for performance reasons. */
>      for (i = 0; i < pkt_cnt; i++) {
>          pkt = pkts[i];
>          if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len)
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index a46f5127f..4d91391fb 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -539,7 +539,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>  static bool tap_supports_vnet_hdr = true;
>  
>  static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
> -static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
>  static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
>                                     int cmd, const char *cmd_name);
>  static int get_flags(const struct netdev *, unsigned int *flags);
> @@ -1593,9 +1593,10 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>  }
>  
>  static int
> -netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
> -                             struct dp_packet_batch *batch)
> +netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex,
> +                             bool tso, int mtu, struct dp_packet_batch 
> *batch)
>  {
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      const size_t size = dp_packet_batch_size(batch);
>      /* We don't bother setting most fields in sockaddr_ll because the
>       * kernel ignores them for SOCK_RAW. */
> @@ -1604,26 +1605,35 @@ netdev_linux_sock_batch_send(int sock, int ifindex, 
> bool tso, int mtu,
>  
>      struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
>      struct iovec *iov = xmalloc(sizeof(*iov) * size);
> -
>      struct dp_packet *packet;
> +    int cnt = 0;
> +
>      DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>          if (tso) {
> -            netdev_linux_prepend_vnet_hdr(packet, mtu);
> -        }
> +            int ret = netdev_linux_prepend_vnet_hdr(packet, mtu);
> +
> +            if (OVS_UNLIKELY(ret)) {
> +                netdev->tx_dropped += 1;
> +                VLOG_WARN_RL(&rl, "%s: Packet dropped. %s",

Nit: maybe make this error message the same as for tap?
It's missing some context.

> +                             netdev_get_name(netdev_), ovs_strerror(ret));
> +                continue;
> +            }
> +         }
>  
> -        iov[i].iov_base = dp_packet_data(packet);
> -        iov[i].iov_len = dp_packet_size(packet);
> -        mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
> -                                            .msg_namelen = sizeof sll,
> -                                            .msg_iov = &iov[i],
> -                                            .msg_iovlen = 1 };
> +        iov[cnt].iov_base = dp_packet_data(packet);
> +        iov[cnt].iov_len = dp_packet_size(packet);
> +        mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll,
> +                                              .msg_namelen = sizeof sll,
> +                                              .msg_iov = &iov[cnt],
> +                                              .msg_iovlen = 1 };
> +        cnt++;
>      }
>  
>      int error = 0;
> -    for (uint32_t ofs = 0; ofs < size; ) {
> +    for (uint32_t ofs = 0; ofs < cnt;) {
>          ssize_t retval;
>          do {
> -            retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
> +            retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0);
>              error = retval < 0 ? errno : 0;
>          } while (error == EINTR);
>          if (error) {
> @@ -1665,7 +1675,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, 
> int mtu,
>          int error;
>  
>          if (OVS_LIKELY(tap_supports_vnet_hdr)) {
> -            netdev_linux_prepend_vnet_hdr(packet, mtu);
> +            error = netdev_linux_prepend_vnet_hdr(packet, mtu);
> +            if (OVS_UNLIKELY(error)) {
> +                netdev->tx_dropped++;
> +                VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet "
> +                             "dropped. %s", netdev_get_name(netdev_),
> +                             ovs_strerror(error));
> +                continue;
> +            }
>          }
>  
>          size = dp_packet_size(packet);
> @@ -1795,7 +1812,8 @@ netdev_linux_send(struct netdev *netdev_, int qid 
> OVS_UNUSED,
>              goto free_batch;
>          }
>  
> -        error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
> +        error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso, 
> mtu,
> +                                             batch);
>      } else {
>          error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
>      }
> @@ -7089,8 +7107,7 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
>      switch (vnet->gso_type) {
>      case VIRTIO_NET_HDR_GSO_TCPV4:
>      case VIRTIO_NET_HDR_GSO_TCPV6:
> -        /* FIXME: The packet has offloaded TCP segmentation. The gso_size
> -         * is given and needs to be respected. */
> +        dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size);
>          dp_packet_hwol_set_tcp_seg(b);
>          break;
>  
> @@ -7112,18 +7129,32 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
>      return ret;
>  }
>  
> -static void
> +/* Prepends struct virtio_net_hdr to packet 'b'.
> + * Returns 0 if successful, otherwise a positive errno value.
> + * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */
> +static int
>  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
>  {
>      struct virtio_net_hdr v;
>      struct virtio_net_hdr *vnet = &v;
>  
>      if (dp_packet_hwol_is_tso(b)) {
> -        uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char 
> *)dp_packet_eth(b))
> -                            + TCP_HEADER_LEN;
> +        uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
> +        struct tcp_header *tcp = dp_packet_l4(b);
> +        int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
> +        int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
> +                      + tcp_hdr_len;
> +        int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
> +
> +        if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) {
> +            VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", "
> +                         "gso: %"PRIu16", max length: %"PRIu32".", hdr_len,
> +                         tso_segsz, max_packet_len);
> +            return EMSGSIZE;
> +        }
>  
>          vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len;
> -        vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
> +        vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz);
>          if (dp_packet_hwol_is_ipv4(b)) {
>              vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>          } else if (dp_packet_hwol_tx_ipv6(b)) {
> @@ -7213,4 +7244,5 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int 
> mtu)
>      }
>  
>      dp_packet_push(b, vnet, sizeof *vnet);
> +    return 0;
>  }

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to