Since commit 8a29111c7 ("net: gro: allow to build full sized skb") gro may build buffers with a frag_list. This can hurt forwarding because most NICs can't offload such packets, they need to be segmented in software. This patch splits buffers with a frag_list at the frag_list pointer into buffers that can be TSO offloaded.
Signed-off-by: Steffen Klassert <steffen.klass...@secunet.com> --- net/core/skbuff.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/af_inet.c | 7 ++-- net/ipv4/gre_offload.c | 7 +++- net/ipv4/tcp_offload.c | 3 ++ net/ipv4/udp_offload.c | 9 +++-- net/ipv6/ip6_offload.c | 6 +++- 6 files changed, 114 insertions(+), 7 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3864b4b6..a614e9d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3078,6 +3078,92 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); + headroom = skb_headroom(head_skb); + + if (list_skb && net_gso_ok(features, skb_shinfo(head_skb)->gso_type) && + csum && sg && (mss != GSO_BY_FRAGS) && + !(features & NETIF_F_GSO_PARTIAL)) { + unsigned int lskb_segs; + unsigned int delta_segs, delta_len, delta_truesize; + struct sk_buff *nskb; + delta_segs = delta_len = delta_truesize = 0; + + segs = __alloc_skb(skb_headlen(head_skb) + headroom, + GFP_ATOMIC, skb_alloc_rx_flag(head_skb), + NUMA_NO_NODE); + if (unlikely(!segs)) + return ERR_PTR(-ENOMEM); + + skb_reserve(segs, headroom); + skb_put(segs, skb_headlen(head_skb)); + skb_copy_from_linear_data(head_skb, segs->data, segs->len); + copy_skb_header(segs, head_skb); + + if (skb_shinfo(head_skb)->nr_frags) { + int i; + + if (skb_orphan_frags(head_skb, GFP_ATOMIC)) + goto err; + + for (i = 0; i < skb_shinfo(head_skb)->nr_frags; i++) { + skb_shinfo(segs)->frags[i] = skb_shinfo(head_skb)->frags[i]; + skb_frag_ref(head_skb, i); + } + skb_shinfo(segs)->nr_frags = i; + } + + do { + nskb = skb_clone(list_skb, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + list_skb = list_skb->next; + + if (!tail) + segs->next = nskb; + else + tail->next = nskb; + + tail = nskb; + + if (skb_cow_head(nskb, doffset + headroom)) + goto err; + + lskb_segs = nskb->len / mss; + + skb_shinfo(nskb)->gso_size = mss; + skb_shinfo(nskb)->gso_type = skb_shinfo(head_skb)->gso_type; + skb_shinfo(nskb)->gso_segs = lskb_segs; + + + delta_segs += lskb_segs; + delta_len += nskb->len; + delta_truesize += nskb->truesize; + + __skb_push(nskb, doffset); + + skb_release_head_state(nskb); + __copy_skb_header(nskb, head_skb); + + skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); + skb_reset_mac_len(nskb); + + skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); + + } while (list_skb); + + skb_shinfo(segs)->gso_segs -= delta_segs; + segs->len = head_skb->len - delta_len; + segs->data_len = head_skb->data_len - delta_len; + segs->truesize += head_skb->data_len - delta_truesize; + + segs->prev = tail; + + goto out; + } + /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. @@ -3090,7 +3176,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, partial_segs = 0; } - headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); do { @@ -3307,6 +3392,8 @@ perform_csum_check: swap(tail->destructor, head_skb->destructor); swap(tail->sk, head_skb->sk); } + +out: return segs; err: diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55513e6..c814afa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1195,7 +1195,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, encap; + bool udpfrag = false, fixedid = false, gso_partial = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; @@ -1248,6 +1248,9 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (IS_ERR_OR_NULL(segs)) goto out; + if (skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL) + gso_partial = true; + skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); @@ -1257,7 +1260,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; tot_len = skb->len - nhoff; - } else if (skb_is_gso(skb)) { + } else if (skb_is_gso(skb) && gso_partial) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ecd1e09..cf82e28 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo; + bool need_csum, ufo, gso_partial; if (!skb->encapsulation) goto out; @@ -69,6 +69,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; } + if (skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL) + gso_partial = true; + else + gso_partial = false; + outer_hlen = skb_tnl_header_len(skb); gre_offset = outer_hlen - tnl_hlen; skb = segs; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 5c59649..dddd227 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -107,6 +107,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, /* Only first segment might have ooo_okay set */ segs->ooo_okay = ooo_okay; + if (skb_is_gso(segs) && !(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL)) + mss = (skb_tail_pointer(segs) - skb_transport_header(segs)) + + segs->data_len - thlen; delta = htonl(oldlen + (thlen + mss)); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 81f253b..dfb6a2c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo; + bool remcsum, need_csum, offload_csum, ufo, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -88,6 +88,11 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, goto out; } + if (skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL) + gso_partial = true; + else + gso_partial = false; + outer_hlen = skb_tnl_header_len(skb); udp_offset = outer_hlen - tnl_hlen; skb = segs; @@ -117,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (skb_is_gso(skb)) { + if (skb_is_gso(skb) && gso_partial) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 22e90e5..0ec16ba 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,6 +69,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int offset = 0; bool encap, udpfrag; int nhoff; + bool gso_partial = false; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); @@ -101,9 +102,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (IS_ERR(segs)) goto out; + if (skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL) + gso_partial = true; + for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) + if (skb_is_gso(skb) && gso_partial) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); -- 1.9.1