From: Willem de Bruijn <will...@google.com>

When sending large datagrams that are later segmented, store data in
page frags to avoid copying from linear in skb_segment.

This logic will also be used by zerocopy.

Signed-off-by: Willem de Bruijn <will...@google.com>
---
 net/ipv4/ip_output.c  | 15 +++++++++++----
 net/ipv6/ip6_output.c | 19 ++++++++++++++-----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7abfb24ec5e5..9ccd6c28e420 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -878,11 +878,13 @@ static int __ip_append_data(struct sock *sk,
        struct rtable *rt = (struct rtable *)cork->dst;
        unsigned int wmem_alloc_delta = 0;
        u32 tskey = 0;
+       bool paged;
 
        skb = skb_peek_tail(queue);
 
        exthdrlen = !skb ? rt->dst.header_len : 0;
        mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+       paged = !!cork->gso_size;
 
        if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
            sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
@@ -934,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
+                       unsigned int pagedlen = 0;
                        struct sk_buff *skb_prev;
 alloc_new_skb:
                        skb_prev = skb;
@@ -954,8 +957,12 @@ static int __ip_append_data(struct sock *sk,
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
-                       else
+                       else if (!paged)
                                alloclen = fraglen;
+                       else {
+                               alloclen = min_t(int, fraglen, MAX_HEADER);
+                               pagedlen = fraglen - alloclen;
+                       }
 
                        alloclen += exthdrlen;
 
@@ -999,7 +1006,7 @@ static int __ip_append_data(struct sock *sk,
                        /*
                         *      Find where to start putting bytes.
                         */
-                       data = skb_put(skb, fraglen + exthdrlen);
+                       data = skb_put(skb, fraglen + exthdrlen - pagedlen);
                        skb_set_network_header(skb, exthdrlen);
                        skb->transport_header = (skb->network_header +
                                                 fragheaderlen);
@@ -1015,7 +1022,7 @@ static int __ip_append_data(struct sock *sk,
                                pskb_trim_unique(skb_prev, maxfraglen);
                        }
 
-                       copy = datalen - transhdrlen - fraggap;
+                       copy = datalen - transhdrlen - fraggap - pagedlen;
                        if (copy > 0 && getfrag(from, data + transhdrlen, 
offset, copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
@@ -1023,7 +1030,7 @@ static int __ip_append_data(struct sock *sk,
                        }
 
                        offset += copy;
-                       length -= datalen - fraggap;
+                       length -= copy + transhdrlen;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        csummode = CHECKSUM_NONE;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3ce947c1d173..9fbcec4fb946 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1270,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk,
        int csummode = CHECKSUM_NONE;
        unsigned int maxnonfragsize, headersize;
        unsigned int wmem_alloc_delta = 0;
+       bool paged;
 
        skb = skb_peek_tail(queue);
        if (!skb) {
@@ -1277,6 +1278,7 @@ static int __ip6_append_data(struct sock *sk,
                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
        }
 
+       paged = !!cork->gso_size;
        mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
        orig_mtu = mtu;
 
@@ -1368,6 +1370,7 @@ static int __ip6_append_data(struct sock *sk,
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
+                       unsigned int pagedlen = 0;
 alloc_new_skb:
                        /* There's no room in the current skb */
                        if (skb)
@@ -1390,11 +1393,17 @@ static int __ip6_append_data(struct sock *sk,
 
                        if (datalen > (cork->length <= mtu && !(cork->flags & 
IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
                                datalen = maxfraglen - fragheaderlen - 
rt->dst.trailer_len;
+                       fraglen = datalen + fragheaderlen;
+
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
-                       else
-                               alloclen = datalen + fragheaderlen;
+                       else if (!paged)
+                               alloclen = fraglen;
+                       else {
+                               alloclen = min_t(int, fraglen, MAX_HEADER);
+                               pagedlen = fraglen - alloclen;
+                       }
 
                        alloclen += dst_exthdrlen;
 
@@ -1416,7 +1425,7 @@ static int __ip6_append_data(struct sock *sk,
                         */
                        alloclen += sizeof(struct frag_hdr);
 
-                       copy = datalen - transhdrlen - fraggap;
+                       copy = datalen - transhdrlen - fraggap - pagedlen;
                        if (copy < 0) {
                                err = -EINVAL;
                                goto error;
@@ -1455,7 +1464,7 @@ static int __ip6_append_data(struct sock *sk,
                        /*
                         *      Find where to start putting bytes
                         */
-                       data = skb_put(skb, fraglen);
+                       data = skb_put(skb, fraglen - pagedlen);
                        skb_set_network_header(skb, exthdrlen);
                        data += fragheaderlen;
                        skb->transport_header = (skb->network_header +
@@ -1478,7 +1487,7 @@ static int __ip6_append_data(struct sock *sk,
                        }
 
                        offset += copy;
-                       length -= datalen - fraggap;
+                       length -= copy + transhdrlen;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        dst_exthdrlen = 0;
-- 
2.17.0.484.g0c8726318c-goog

Reply via email to