On 10/31/23 20:51, Mike Pattrick wrote: > From: Flavio Leitner <f...@sysclose.org> > > Currently OVS will calculate the segment size based on the > MTU of the egress port. That usually happens to be correct > when the ports share the same MTU, but that is not always true. > > Therefore, if the segment size is provided, then use that and > make sure the over sized packets are dropped. > > Signed-off-by: Flavio Leitner <f...@sysclose.org> > Co-authored-by: Mike Pattrick <m...@redhat.com> > Signed-off-by: Mike Pattrick <m...@redhat.com> > --- > lib/dp-packet.c | 3 ++ > lib/dp-packet.h | 26 ++++++++++++++++ > lib/netdev-dpdk.c | 12 +++++++- > lib/netdev-linux.c | 76 ++++++++++++++++++++++++++++++++-------------- > 4 files changed, 94 insertions(+), 23 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index ed004c3b9..920402369 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -34,6 +34,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, > enum dp_packet_source so > pkt_metadata_init(&b->md, 0); > dp_packet_reset_cutlen(b); > dp_packet_reset_offload(b); > + dp_packet_set_tso_segsz(b, 0); > /* Initialize implementation-specific fields of dp_packet. */ > dp_packet_init_specific(b); > /* By default assume the packet type to be Ethernet. */ > @@ -203,6 +204,8 @@ dp_packet_clone_with_headroom(const struct dp_packet > *buffer, size_t headroom) > *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer); > *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK; > > + dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer)); > + > if (dp_packet_rss_valid(buffer)) { > dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer)); > } > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 70ddf8aa4..6a924f3ff 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -126,6 +126,7 @@ struct dp_packet { > uint32_t ol_flags; /* Offloading flags. */ > uint32_t rss_hash; /* Packet hash. */ > uint32_t flow_mark; /* Packet flow mark. */ > + uint16_t tso_segsz; /* TCP TSO segment size. */ > #endif > enum dp_packet_source source; /* Source of memory allocated as 'base'. > */ > > @@ -166,6 +167,9 @@ static inline void dp_packet_set_size(struct dp_packet *, > uint32_t); > static inline uint16_t dp_packet_get_allocated(const struct dp_packet *); > static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t); > > +static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *); > +static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t); > + > void *dp_packet_resize_l2(struct dp_packet *, int increment); > void *dp_packet_resize_l2_5(struct dp_packet *, int increment); > static inline void *dp_packet_eth(const struct dp_packet *); > @@ -644,6 +648,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) > b->mbuf.buf_len = s; > } > > +static inline uint16_t > +dp_packet_get_tso_segsz(const struct dp_packet *p) > +{ > + return p->mbuf.tso_segsz; > +} > + > +static inline void > +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s) > +{ > + p->mbuf.tso_segsz = s; > +} > #else /* DPDK_NETDEV */ > > static inline void > @@ -700,6 +715,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) > b->allocated_ = s; > } > > +static inline uint16_t > +dp_packet_get_tso_segsz(const struct dp_packet *p) > +{ > + return p->tso_segsz; > +} > + > +static inline void > +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s) > +{ > + p->tso_segsz = s; > +} > #endif /* DPDK_NETDEV */ > > static inline void > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 55700250d..9f20cc689 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2453,6 +2453,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > struct tcp_header *th = dp_packet_l4(pkt); > + int hdr_len; > > if (!th) { > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > @@ -2462,7 +2463,15 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > > mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; > + hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) > { > + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. " > + "hdr: %"PRIu32", gso: %"PRIu32", max len: > %"PRIu32"", > + dev->up.name, hdr_len, mbuf->tso_segsz, > + dev->max_packet_len); > + return false; > + } > > if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { > mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; > @@ -2737,7 +2746,8 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, > struct rte_mbuf **pkts, > int cnt = 0; > struct rte_mbuf *pkt; > > - /* Filter oversized packets, unless are marked for TSO. */ > + /* Filter oversized packets. The TSO packets are filtered out > + * during the offloading preparation for performance reasons. */ > for (i = 0; i < pkt_cnt; i++) { > pkt = pkts[i]; > if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len) > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index a46f5127f..4d91391fb 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -539,7 +539,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0); > static bool tap_supports_vnet_hdr = true; > > static int netdev_linux_parse_vnet_hdr(struct dp_packet *b); > -static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu); > +static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu); > static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *, > int cmd, const char *cmd_name); > static int get_flags(const struct netdev *, unsigned int *flags); > @@ -1593,9 +1593,10 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_) > } > > static int > -netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu, > - struct dp_packet_batch *batch) > +netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex, > + bool tso, int mtu, struct dp_packet_batch > *batch) > { > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > const size_t size = dp_packet_batch_size(batch); > /* We don't bother setting most fields in sockaddr_ll because the > * kernel ignores them for SOCK_RAW. */ > @@ -1604,26 +1605,35 @@ netdev_linux_sock_batch_send(int sock, int ifindex, > bool tso, int mtu, > > struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size); > struct iovec *iov = xmalloc(sizeof(*iov) * size); > - > struct dp_packet *packet; > + int cnt = 0; > + > DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > if (tso) { > - netdev_linux_prepend_vnet_hdr(packet, mtu); > - } > + int ret = netdev_linux_prepend_vnet_hdr(packet, mtu); > + > + if (OVS_UNLIKELY(ret)) { > + netdev->tx_dropped += 1; > + VLOG_WARN_RL(&rl, "%s: Packet dropped. %s",
Nit: maybe make this error message the same as for tap? It's missing some context. > + netdev_get_name(netdev_), ovs_strerror(ret)); > + continue; > + } > + } > > - iov[i].iov_base = dp_packet_data(packet); > - iov[i].iov_len = dp_packet_size(packet); > - mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll, > - .msg_namelen = sizeof sll, > - .msg_iov = &iov[i], > - .msg_iovlen = 1 }; > + iov[cnt].iov_base = dp_packet_data(packet); > + iov[cnt].iov_len = dp_packet_size(packet); > + mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll, > + .msg_namelen = sizeof sll, > + .msg_iov = &iov[cnt], > + .msg_iovlen = 1 }; > + cnt++; > } > > int error = 0; > - for (uint32_t ofs = 0; ofs < size; ) { > + for (uint32_t ofs = 0; ofs < cnt;) { > ssize_t retval; > do { > - retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0); > + retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0); > error = retval < 0 ? errno : 0; > } while (error == EINTR); > if (error) { > @@ -1665,7 +1675,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, > int mtu, > int error; > > if (OVS_LIKELY(tap_supports_vnet_hdr)) { > - netdev_linux_prepend_vnet_hdr(packet, mtu); > + error = netdev_linux_prepend_vnet_hdr(packet, mtu); > + if (OVS_UNLIKELY(error)) { > + netdev->tx_dropped++; > + VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet " > + "dropped. %s", netdev_get_name(netdev_), > + ovs_strerror(error)); > + continue; > + } > } > > size = dp_packet_size(packet); > @@ -1795,7 +1812,8 @@ netdev_linux_send(struct netdev *netdev_, int qid > OVS_UNUSED, > goto free_batch; > } > > - error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch); > + error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso, > mtu, > + batch); > } else { > error = netdev_linux_tap_batch_send(netdev_, mtu, batch); > } > @@ -7089,8 +7107,7 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) > switch (vnet->gso_type) { > case VIRTIO_NET_HDR_GSO_TCPV4: > case VIRTIO_NET_HDR_GSO_TCPV6: > - /* FIXME: The packet has offloaded TCP segmentation. The gso_size > - * is given and needs to be respected. */ > + dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size); > dp_packet_hwol_set_tcp_seg(b); > break; > > @@ -7112,18 +7129,32 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) > return ret; > } > > -static void > +/* Prepends struct virtio_net_hdr to packet 'b'. > + * Returns 0 if successful, otherwise a positive errno value. > + * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */ > +static int > netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) > { > struct virtio_net_hdr v; > struct virtio_net_hdr *vnet = &v; > > if (dp_packet_hwol_is_tso(b)) { > - uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char > *)dp_packet_eth(b)) > - + TCP_HEADER_LEN; > + uint16_t tso_segsz = dp_packet_get_tso_segsz(b); > + struct tcp_header *tcp = dp_packet_l4(b); > + int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4; > + int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b)) > + + tcp_hdr_len; > + int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN; > + > + if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) { > + VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", " > + "gso: %"PRIu16", max length: %"PRIu32".", hdr_len, > + tso_segsz, max_packet_len); > + return EMSGSIZE; > + } > > vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len; > - vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len); > + vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz); > if (dp_packet_hwol_is_ipv4(b)) { > vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; > } else if (dp_packet_hwol_tx_ipv6(b)) { > @@ -7213,4 +7244,5 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int > mtu) > } > > dp_packet_push(b, vnet, sizeof *vnet); > + return 0; > } _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev