From: Willem de Bruijn <will...@google.com> When sending large datagrams that are later segmented, store data in page frags to avoid copying from linear in skb_segment.
This logic will also be used by zerocopy. Signed-off-by: Willem de Bruijn <will...@google.com> --- net/ipv4/ip_output.c | 15 +++++++++++---- net/ipv6/ip6_output.c | 19 ++++++++++++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7abfb24ec5e5..9ccd6c28e420 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -878,11 +878,13 @@ static int __ip_append_data(struct sock *sk, struct rtable *rt = (struct rtable *)cork->dst; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; + bool paged; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; + paged = !!cork->gso_size; if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) @@ -934,6 +936,7 @@ static int __ip_append_data(struct sock *sk, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; @@ -954,8 +957,12 @@ static int __ip_append_data(struct sock *sk, if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else + else if (!paged) alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += exthdrlen; @@ -999,7 +1006,7 @@ static int __ip_append_data(struct sock *sk, /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen + exthdrlen); + data = skb_put(skb, fraglen + exthdrlen - pagedlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); @@ -1015,7 +1022,7 @@ static int __ip_append_data(struct sock *sk, pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); @@ -1023,7 +1030,7 @@ static int __ip_append_data(struct sock *sk, } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3ce947c1d173..9fbcec4fb946 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1270,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; + bool paged; skb = skb_peek_tail(queue); if (!skb) { @@ -1277,6 +1278,7 @@ static int __ip6_append_data(struct sock *sk, dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } + paged = !!cork->gso_size; mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; @@ -1368,6 +1370,7 @@ static int __ip6_append_data(struct sock *sk, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; alloc_new_skb: /* There's no room in the current skb */ if (skb) @@ -1390,11 +1393,17 @@ static int __ip6_append_data(struct sock *sk, if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else - alloclen = datalen + fragheaderlen; + else if (!paged) + alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += dst_exthdrlen; @@ -1416,7 +1425,7 @@ static int __ip6_append_data(struct sock *sk, */ alloclen += sizeof(struct frag_hdr); - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy < 0) { err = -EINVAL; goto error; @@ -1455,7 +1464,7 @@ static int __ip6_append_data(struct sock *sk, /* * Find where to start putting bytes */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen - pagedlen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + @@ -1478,7 +1487,7 @@ static int __ip6_append_data(struct sock *sk, } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; -- 2.17.0.484.g0c8726318c-goog