On Sun, 2016-05-08 at 09:08 -0700, Eric Dumazet wrote:

> So we probably need to make sure the network header is properly set for
> the segments. Then skb_reset_mac_len(nskb); would work as intended.
> 
> Since skb_segment() is called from the deepest point in GSO path,
> it always see the inner network header.
> 
> Sounds like skb_reset_network_header() calls done in inet_gso_segment()
> and ipv6_gso_segment() should only be done for the outer header, (when
> SKB_GSO_CB(skb)->encap_level == 0), or even better, only done in 
> skb_mac_gso_segment()
> 
> Then we need to use the proper (inner) network header in
> tcp4_gso_segment() and tcp6_gso_segment(), as they currently use
> ip_hdr() and ipv6_hdr()
> 

Prototype patch works for me (but GRE/UDP offloads might need some
work), and would even save few cycles...

Unfortunately GSO for GRE/UDP is kind of mess.

 net/core/dev.c           |    1 +
 net/ipv4/af_inet.c       |    9 +++------
 net/ipv4/tcp_offload.c   |    2 +-
 net/ipv6/ip6_offload.c   |    9 +++------
 net/ipv6/tcpv6_offload.c |    2 +-
 5 files changed, 9 insertions(+), 14 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c925ac50b95..3a9035ec862b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2658,6 +2658,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
                return ERR_PTR(-EINVAL);
 
        __skb_pull(skb, vlan_depth);
+       skb_reset_network_header(skb);
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, &offload_base, list) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9e481992dbae..fef6335a75bc 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1220,12 +1220,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff 
*skb,
                       0)))
                goto out;
 
-       skb_reset_network_header(skb);
-       nhoff = skb_network_header(skb) - skb_mac_header(skb);
+       skb_reset_inner_network_header(skb);
+       nhoff = skb->data - skb_mac_header(skb);
        if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
                goto out;
 
-       iph = ip_hdr(skb);
+       iph = inner_ip_hdr(skb);
        ihl = iph->ihl * 4;
        if (ihl < sizeof(*iph))
                goto out;
@@ -1274,9 +1274,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff 
*skb,
                }
                iph->tot_len = htons(skb->len - nhoff);
                ip_send_check(iph);
-               if (encap)
-                       skb_reset_inner_headers(skb);
-               skb->network_header = (u8 *)iph - skb->head;
        } while ((skb = skb->next));
 
 out:
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 773083b7f1e9..f0650b50680e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -36,7 +36,7 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
                return ERR_PTR(-EINVAL);
 
        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-               const struct iphdr *iph = ip_hdr(skb);
+               const struct iphdr *iph = inner_ip_hdr(skb);
                struct tcphdr *th = tcp_hdr(skb);
 
                /* Set up checksum pseudo header, usually expect stack to
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 82e9f3076028..8d27299f86e4 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -84,8 +84,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
                       0)))
                goto out;
 
-       skb_reset_network_header(skb);
-       nhoff = skb_network_header(skb) - skb_mac_header(skb);
+       skb_reset_inner_network_header(skb);
+       nhoff = skb->data - skb_mac_header(skb);
        if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
                goto out;
 
@@ -94,7 +94,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
                features &= skb->dev->hw_enc_features;
        SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
 
-       ipv6h = ipv6_hdr(skb);
+       ipv6h = inner_ipv6_hdr(skb);
        __skb_pull(skb, sizeof(*ipv6h));
        segs = ERR_PTR(-EPROTONOSUPPORT);
 
@@ -118,7 +118,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
        for (skb = segs; skb; skb = skb->next) {
                ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
                ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
-               skb->network_header = (u8 *)ipv6h - skb->head;
 
                if (udpfrag) {
                        unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
@@ -129,8 +128,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
                        offset += (ntohs(ipv6h->payload_len) -
                                   sizeof(struct frag_hdr));
                }
-               if (encap)
-                       skb_reset_inner_headers(skb);
        }
 
 out:
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c9204c01..8e747a295bce 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -50,7 +50,7 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
                return ERR_PTR(-EINVAL);
 
        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-               const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+               const struct ipv6hdr *ipv6h = inner_ipv6_hdr(skb);
                struct tcphdr *th = tcp_hdr(skb);
 
                /* Set up pseudo header, usually expect stack to have done


Reply via email to