Hello, Sorry, not a full review but one part bothers me for TSO.
On Mon, 24 Nov 2025 at 21:23, Mike Pattrick via dev <[email protected]> wrote: > @@ -9033,13 +9037,138 @@ pmd_send_port_cache_lookup(const struct > dp_netdev_pmd_thread *pmd, > return tx_port_lookup(&pmd->send_port_cache, port_no); > } > > +/* Return NULL on no ICMP reply needed. */ > +static struct dp_packet * > +netdev_generate_frag_needed(struct dp_packet *packet, int send_size, int mtu) > +{ > + const struct eth_header *eth; > + const void *l3; > + size_t l3_len; > + bool is_ipv6; > + > + if (send_size < mtu) { > + return NULL; > + } > + > + eth = dp_packet_eth(packet); > + if (!eth) { > + return NULL; > + } > + > + if (eth->eth_type == htons(ETH_TYPE_IP)) { > + is_ipv6 = false; > + } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) { > + is_ipv6 = true; > + } else { > + return NULL; > + } > + > + l3 = dp_packet_l3(packet); > + l3_len = dp_packet_l3_size(packet); > + > + if (is_ipv6) { > + const struct ovs_16aligned_ip6_hdr *ip6; > + struct in6_addr ip6_src; > + struct in6_addr ip6_dst; > + size_t max_payload; > + const void *l4_buf; > + uint8_t nw_proto; > + uint8_t nw_frag; > + size_t l4_len; > + > + ip6 = (const struct ovs_16aligned_ip6_hdr *) l3; > + > + if (mtu < 1280) { > + return NULL; > + } I don't have all the context in mind. What does 1280 stand for? > + > + if (ipv6_addr_is_multicast(&ip6->ip6_src) || > + ipv6_addr_is_any(&ip6->ip6_src) || > + ipv6_addr_is_loopback(&ip6->ip6_src)) { > + return NULL; > + } > + > + nw_proto = ip6->ip6_nxt; > + l4_buf = ip6 + 1; > + l4_len = l3_len - sizeof *ip6; > + if (!parse_ipv6_ext_hdrs(&l4_buf, &l4_len, &nw_proto, &nw_frag, > + NULL, NULL)) { > + return NULL; > + } > + > + if (nw_frag == FLOW_NW_FRAG_LATER) { > + return NULL; > + } > + > + if (nw_proto == IPPROTO_ICMPV6) { > + const struct icmp6_header *icmp = l4_buf; > + if (icmp && packet_icmpv6_is_err(icmp->icmp6_type)) { > + return NULL; > + } > + } > + > + max_payload = 1280 - ETH_HEADER_LEN - IPV6_HEADER_LEN - > + ICMP6_DATA_HEADER_LEN; > + l3_len = l3_len < max_payload ? l3_len : max_payload; > + > + memcpy(&ip6_src, &ip6->ip6_dst, sizeof(ip6_dst)); > + memcpy(&ip6_dst, &ip6->ip6_src, sizeof(ip6_dst)); > + return compose_ipv6_ptb(eth->eth_dst, eth->eth_src, > + &ip6_dst, &ip6_src, > + htonl(mtu), l3, l3_len); > + } else { > + const struct ip_header *ip; > + size_t icmp_payload_len; > + size_t available; > + > + ip = (const struct ip_header *) l3; > + > + if (mtu < 576) { And here, why 576? > + return NULL; > + } > + > + if (!(ip->ip_frag_off & htons(IP_DF))) { > + return NULL; > + } > + > + if (ip_is_multicast(get_16aligned_be32(&ip->ip_src)) || > + ip_is_broadcast(get_16aligned_be32(&ip->ip_src)) || > + ip_is_loopback(get_16aligned_be32(&ip->ip_src))) { > + return NULL; > + } > + > + if (ip->ip_proto == IPPROTO_ICMP) { > + const struct icmp_header *icmp = dp_packet_l4(packet); > + if (icmp && packet_icmp_is_err(icmp->icmp_type)) { > + return NULL; > + } > + } > + icmp_payload_len = IP_IHL(ip->ip_ihl_ver) * 4 + > ICMP_ERROR_DATA_L4_LEN; > + > + available = l3_len; > + if (icmp_payload_len > available) { > + icmp_payload_len = available; > + } > + > + return compose_ipv4_fn(eth->eth_dst, eth->eth_src, > + get_16aligned_be32(&ip->ip_dst), > + get_16aligned_be32(&ip->ip_src), > + htons(mtu), l3, icmp_payload_len); > + } > +} > + > static int > -push_tnl_action(const struct dp_netdev_pmd_thread *pmd, > +push_tnl_action(struct dp_netdev_pmd_thread *pmd, > const struct nlattr *attr, > struct dp_packet_batch *batch) > { > - struct tx_port *tun_port; > + size_t i, size = dp_packet_batch_size(batch); > const struct ovs_action_push_tnl *data; > + uint32_t *depth = recirc_depth_get(); > + struct dp_packet *packet; > + struct tx_port *tun_port; > + struct netdev *netdev; > + int mtu; > int err; > > data = nl_attr_get(attr); > @@ -9049,7 +9178,45 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd, > err = -EINVAL; > goto error; > } > - err = netdev_push_header(tun_port->port->netdev, batch, data); > + > + netdev = tun_port->port->netdev; > + if (netdev->mtu_user_config && > + netdev_get_mtu(netdev, &mtu) == 0) { > + struct dp_packet_batch icmp_batch; > + > + dp_packet_batch_init(&icmp_batch); > + DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > + int len = dp_packet_get_send_len(packet) + data->header_len; I suspect this will not work with TSO packets: dp_packet_get_send_len() returns a length before segmentation and it will not be the actual length of sent data. > + > + struct dp_packet *icmp; > + > + icmp = netdev_generate_frag_needed(packet, len, mtu); > + if (!icmp) { > + dp_packet_batch_refill(batch, packet, i); > + continue; > + } > + > + dp_packet_delete(packet); > + COVERAGE_INC(datapath_drop_tunnel_mtu_drop); > + > + pkt_metadata_init(&icmp->md, data->tnl_port); > + > + dp_packet_batch_add(&icmp_batch, icmp); > + } > + > + if (*depth >= MAX_RECIRC_DEPTH) { > + COVERAGE_ADD(datapath_drop_recirc_error, > + dp_packet_batch_size(&icmp_batch)); > + dp_packet_delete_batch(&icmp_batch, true); > + } > + > + if (dp_packet_batch_size(&icmp_batch) > 0) { > + (*depth)++; > + dp_netdev_recirculate(pmd, &icmp_batch); > + (*depth)--; > + } > + } > + err = netdev_push_header(netdev, batch, data); > if (!err) { > return 0; > } -- David Marchand _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
