git sha 20251112170420.3155127-12
Author: David Marchand <[email protected]>
netdev: Use HW segmentation without outer UDP checksum.
This commit introduces hardware segmentation optimization for UDP tunnels
when outer UDP checksum offload is not available. It adds partial
segmentation logic that splits only the last segment while allowing
hardware to handle the remaining segments, improving performance
significantly.
> diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
> index 6b8da8b370..d8d3a87500 100644
> --- a/lib/dp-packet-gso.c
> +++ b/lib/dp-packet-gso.c
> @@ -77,6 +77,20 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
> return DIV_ROUND_UP(data_length, segsz);
> }
>
> +int
> +dp_packet_gso_partial_nr_segs(struct dp_packet *p)
> +{
> + if ((dp_packet_tunnel_geneve(p)
> + || dp_packet_tunnel_vxlan(p))
> + && dp_packet_l4_checksum_partial(p)
> + && dp_packet_get_inner_tcp_payload_length(p)
> + != dp_packet_gso_nr_segs(p) * dp_packet_get_tso_segsz(p)) {
> + return 2;
> + }
> +
> + return 1;
> +}
Can this function have a bounds checking issue with the multiplication in
the comparison? When dp_packet_gso_nr_segs() returns a large value and
multiplies with tso_segsz, could this overflow the comparison operand?
> @@ -139,7 +153,7 @@ dp_packet_gso_update_segment(struct dp_packet *seg, int
> seg_no, int n_segs,
> tcp_seq += seg_no * tso_segsz;
> put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
>
> - if (OVS_LIKELY(seg_no < (n_segs - 1))) {
> + if (seg_no < (n_segs - 1) && !dp_packet_get_tso_segsz(seg)) {
Why was the OVS_LIKELY hint removed from this condition? Does the new
logic change the probability distribution enough to make this hint
counterproductive?
> @@ -160,12 +174,9 @@ dp_packet_gso_update_segment(struct dp_packet *seg, int
> seg_no, int n_segs,
> }
> }
>
> -/* Perform software segmentation on packet 'p'.
> - *
> - * Segments packet 'p' into the array of preallocated batches in 'batches',
> - * updating the 'batches' pointer as needed. */
> -void
> -dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
> +static void
> +dp_packet_gso__(struct dp_packet *p, struct dp_packet_batch **batches,
> + bool partial_seg)
Does this function name follow the coding style guidelines? The double
underscore suffix pattern seems inconsistent with typical OVS naming
conventions.
> @@ -199,6 +210,13 @@ dp_packet_gso(struct dp_packet *p, struct
> dp_packet_batch **batches)
> data_len = dp_packet_get_tcp_payload_length(p);
> }
>
> + if (partial_seg) {
> + if (dp_packet_gso_partial_nr_segs(p) != 1) {
> + goto last_seg;
> + }
> + goto first_seg;
> + }
> +
Are these goto statements necessary for correctness? Would restructuring
with if-else blocks improve readability while maintaining the same logic?
> diff --git a/lib/netdev.c b/lib/netdev.c
> index f493dab005..5413ca61d0 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -71,6 +71,7 @@ COVERAGE_DEFINE(netdev_add_router);
> COVERAGE_DEFINE(netdev_get_stats);
> COVERAGE_DEFINE(netdev_push_header_drops);
> COVERAGE_DEFINE(netdev_soft_seg_good);
> +COVERAGE_DEFINE(netdev_partial_seg_good);
[ ... ]
> @@ -927,16 +927,16 @@ netdev_send(struct netdev *netdev, int qid, struct
> dp_packet_batch *batch,
> DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> if (dp_packet_get_tso_segsz(packet)
> && dp_packet_tunnel(packet)) {
> - return netdev_send_tso(netdev, qid, batch,
> concurrent_txq);
> + return netdev_send_tso(netdev, qid, batch,
> concurrent_txq,
> + false);
> }
> }
> } else if (!(netdev_flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) {
> DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> if (dp_packet_get_tso_segsz(packet)
> - && (dp_packet_tunnel_vxlan(packet)
> - || dp_packet_tunnel_geneve(packet))
> - && dp_packet_l4_checksum_partial(packet)) {
> - return netdev_send_tso(netdev, qid, batch,
> concurrent_txq);
> + && dp_packet_gso_partial_nr_segs(packet) != 1) {
> + return netdev_send_tso(netdev, qid, batch,
> concurrent_txq,
> + true);
> }
> }
> }
Does this change introduce duplicate calls to dp_packet_gso_partial_nr_segs()
for the same packet? The function contains several comparisons and could
be called multiple times in the segmentation path.
> diff --git a/lib/packets.c b/lib/packets.c
> index c05b4abcc8..4e5cac66e4 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -2180,6 +2182,38 @@ packet_udp_tunnel_csum(struct dp_packet *p)
> partial_csum = csum_continue(partial_csum, inner_l4_csum_p + 1,
> (char *) inner_l4_data - (char *)(inner_l4_csum_p + 1));
> udp->udp_csum = csum_finish(partial_csum);
> + tso_segsz = dp_packet_get_tso_segsz(p);
> + if (tso_segsz) {
> + uint16_t payload_len = dp_packet_get_inner_tcp_payload_length(p);
> +
> + ovs_assert(payload_len == tso_segsz * dp_packet_gso_nr_segs(p));
Is this assertion safe against integer overflow in the multiplication?
With large values of tso_segsz and dp_packet_gso_nr_segs(), could the
multiplication exceed uint16_t range before the comparison?
> + /* The pseudo header used in the outer UDP checksum is dependent on
> + * the ip_tot_len / ip6_plen which was a reflection of the TSO frame
> + * size. The segmented packet will be shorter. */
> + udp->udp_csum = recalc_csum16(udp->udp_csum, htons(payload_len),
> + htons(tso_segsz));
[ ... ]
> + /* Summary: we only to care about the inner IPv6 header update.
> + */
Does this comment have a grammar error? Should "we only to care" be
"we only need to care"?
> + if (IP_VER(inner_ip->ip_ihl_ver) != 4) {
> + udp->udp_csum = recalc_csum16(udp->udp_csum, htons(payload_len),
> + htons(tso_segsz));
> + }
Does this code duplicate the checksum recalculation? The same
recalc_csum16() call appears to be made twice with identical parameters
for IPv6 packets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev