From: Willem de Bruijn <will...@google.com> Add net_offload flag NET_OFF_FLAG_GRO_OFF. If set, a net_offload will not be used for gro receive processing.
Also add sysctl helper proc_do_net_offload that toggles this flag and register sysctls net.{core,ipv4,ipv6}.gro Signed-off-by: Willem de Bruijn <will...@google.com> --- drivers/net/vxlan.c | 8 +++++ include/linux/netdevice.h | 7 ++++- net/core/dev.c | 1 + net/core/sysctl_net_core.c | 60 ++++++++++++++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 7 +++++ net/ipv6/ip6_offload.c | 10 +++++-- net/ipv6/sysctl_net_ipv6.c | 8 +++++ 7 files changed, 97 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e5d236595206..8cb8e02c8ab6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -572,6 +572,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { + const struct net_offload *ops; struct sk_buff *pp = NULL; struct sk_buff *p; struct vxlanhdr *vh, *vh2; @@ -606,6 +607,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk, goto out; } + rcu_read_lock(); + ops = net_gro_receive(dev_offloads, ETH_P_TEB); + rcu_read_unlock(); + if (!ops) + goto out; + skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */ list_for_each_entry(p, head, list) { @@ -621,6 +628,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk, } pp = call_gro_receive(eth_gro_receive, head, skb); + flush = 0; out: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b9e671887fc2..93e8c9ade593 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2377,6 +2377,10 @@ struct net_offload { /* This should be set for any extension header which is compatible with GSO. */ #define INET6_PROTO_GSO_EXTHDR 0x1 +#define NET_OFF_FLAG_GRO_OFF 0x2 + +int proc_do_net_offload(struct ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos); /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { @@ -3583,7 +3587,8 @@ net_gro_receive(struct net_offload __rcu **offs, u16 type) off = rcu_dereference(offs[net_offload_from_type(type)]); if (off && off->callbacks.gro_receive && - (!off->type || off->type == type)) + (!off->type || off->type == type) && + !(off->flags & NET_OFF_FLAG_GRO_OFF)) return off; else return NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 20d9552afd38..0fd5273bc931 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -154,6 +154,7 @@ #define GRO_MAX_HEAD (MAX_HEADER + 128) static DEFINE_SPINLOCK(ptype_lock); +DEFINE_SPINLOCK(offload_lock); struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b1a2c5e38530..d2d72afdd9eb 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -15,6 +15,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/bitmap.h> #include <net/ip.h> #include <net/sock.h> @@ -34,6 +35,58 @@ static int net_msg_warn; /* Unused, but still a sysctl */ int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); +extern spinlock_t offload_lock; + +#define NET_OFF_TBL_LEN 256 + +int proc_do_net_offload(struct ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + unsigned long bitmap[NET_OFF_TBL_LEN / (sizeof(unsigned long) << 3)]; + struct ctl_table tbl = { .maxlen = NET_OFF_TBL_LEN, .data = bitmap }; + unsigned long flag = (unsigned long) ctl->extra2; + struct net_offload __rcu **offs = ctl->extra1; + struct net_offload *off; + int i, ret; + + memset(bitmap, 0, sizeof(bitmap)); + + spin_lock(&offload_lock); + + for (i = 0; i < tbl.maxlen; i++) { + off = rcu_dereference_protected(offs[i], lockdep_is_held(&offload_lock)); + if (off && off->flags & flag) { + /* flag specific constraints */ + if (flag == NET_OFF_FLAG_GRO_OFF) { + /* gro disable bit: only if can gro */ + if (!off->callbacks.gro_receive && + !(off->flags & INET6_PROTO_GSO_EXTHDR)) + continue; + } + set_bit(i, bitmap); + } + } + + ret = proc_do_large_bitmap(&tbl, write, buffer, lenp, ppos); + + if (write && !ret) { + for (i = 0; i < tbl.maxlen; i++) { + bool isset = test_bit(i, bitmap); + + off = rcu_dereference_protected(offs[i], lockdep_is_held(&offload_lock)); + if (!isset && (off->flags & flag)) + off->flags &= ~flag; + else if (isset && !(off->flags & flag)) + off->flags |= flag; + } + } + + spin_unlock(&offload_lock); + + return ret; +} +EXPORT_SYMBOL(proc_do_net_offload); + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -435,6 +488,13 @@ static struct ctl_table net_core_table[] = { .extra1 = &zero, .extra2 = &one }, + { + .procname = "gro", + .mode = 0644, + .proc_handler = proc_do_net_offload, + .extra1 = dev_offloads, + .extra2 = (void *) NET_OFF_FLAG_GRO_OFF, + }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b92f422f2fa8..7a525039afb2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -477,6 +477,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "gro", + .mode = 0644, + .proc_handler = proc_do_net_offload, + .extra1 = inet_offloads, + .extra2 = (void *) NET_OFF_FLAG_GRO_OFF, + }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 2d0ea3f453f2..6be5adbd2ce7 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -20,7 +20,7 @@ #include "ip6_offload.h" -static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) +static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto, bool is_gro) { for (;;) { struct ipv6_opt_hdr *opth; @@ -30,6 +30,10 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) INET6_PROTO_GSO_EXTHDR)) break; + if (is_gro && !net_offload_has_flag(inet6_offloads, proto, + NET_OFF_FLAG_GRO_OFF)) + break; + if (unlikely(!pskb_may_pull(skb, 8))) break; @@ -76,7 +80,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, __skb_pull(skb, sizeof(*ipv6h)); segs = ERR_PTR(-EPROTONOSUPPORT); - proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr, false); if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) @@ -188,7 +192,7 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head, if (!ops) { __pskb_pull(skb, skb_gro_offset(skb)); skb_gro_frag0_invalidate(skb); - proto = ipv6_gso_pull_exthdrs(skb, proto); + proto = ipv6_gso_pull_exthdrs(skb, proto, true); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); __skb_push(skb, skb_gro_offset(skb)); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e15cd37024fd..83f14962a909 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -20,6 +20,7 @@ #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif +#include <net/protocol.h> static int zero; static int one = 1; @@ -178,6 +179,13 @@ static struct ctl_table ipv6_rotable[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one }, + { + .procname = "gro", + .mode = 0644, + .proc_handler = proc_do_net_offload, + .extra1 = inet6_offloads, + .extra2 = (void *) NET_OFF_FLAG_GRO_OFF, + }, #ifdef CONFIG_NETLABEL { .procname = "calipso_cache_enable", -- 2.19.0.397.gdd90340f6a-goog