Re: [PATCH net-next RFC 6/8] net: make gro configurable

2018-09-14 Thread Willem de Bruijn
On Fri, Sep 14, 2018 at 1:59 PM Willem de Bruijn
 wrote:
>
> From: Willem de Bruijn 
>
> Add net_offload flag NET_OFF_FLAG_GRO_OFF. If set, a net_offload will
> not be used for gro receive processing.
>
> Also add sysctl helper proc_do_net_offload that toggles this flag and
> register sysctls net.{core,ipv4,ipv6}.gro
>
> Signed-off-by: Willem de Bruijn 
> ---
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 20d9552afd38..0fd5273bc931 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -154,6 +154,7 @@
>  #define GRO_MAX_HEAD (MAX_HEADER + 128)
>
>  static DEFINE_SPINLOCK(ptype_lock);
> +DEFINE_SPINLOCK(offload_lock);
>  struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
>  struct list_head ptype_all __read_mostly;  /* Taps */
>  static struct list_head offload_base __read_mostly;
> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
> index b1a2c5e38530..d2d72afdd9eb 100644
> --- a/net/core/sysctl_net_core.c
> +++ b/net/core/sysctl_net_core.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include 
>  #include 
> @@ -34,6 +35,58 @@ static int net_msg_warn; /* Unused, but still a sysctl 
> */
>  int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
>  EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
>
> +extern spinlock_t offload_lock;
> +
> +#define NET_OFF_TBL_LEN256
> +
> +int proc_do_net_offload(struct ctl_table *ctl, int write, void __user 
> *buffer,
> +   size_t *lenp, loff_t *ppos)
> +{
> +   unsigned long bitmap[NET_OFF_TBL_LEN / (sizeof(unsigned long) << 3)];
> +   struct ctl_table tbl = { .maxlen = NET_OFF_TBL_LEN, .data = bitmap };
> +   unsigned long flag = (unsigned long) ctl->extra2;
> +   struct net_offload __rcu **offs = ctl->extra1;
> +   struct net_offload *off;
> +   int i, ret;
> +
> +   memset(bitmap, 0, sizeof(bitmap));
> +
> +   spin_lock(&offload_lock);
> +
> +   for (i = 0; i < tbl.maxlen; i++) {
> +   off = rcu_dereference_protected(offs[i], 
> lockdep_is_held(&offload_lock));
> +   if (off && off->flags & flag) {

This does not actually work as is. No protocol will have this flag set
out of the box.

I was in the middle of rewriting some of this when it became topical,
so I sent it out for discussion. It's bound not to be the only bug of
the patchset as is. I'll work through them to get it back in shape.


Re: [PATCH net-next RFC 6/8] net: make gro configurable

2018-09-14 Thread Willem de Bruijn
On Fri, Sep 14, 2018 at 6:50 PM Willem de Bruijn
 wrote:
>
> On Fri, Sep 14, 2018 at 2:39 PM Stephen Hemminger
>  wrote:
> >
> > On Fri, 14 Sep 2018 13:59:39 -0400
> > Willem de Bruijn  wrote:
> >
> > > diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> > > index e5d236595206..8cb8e02c8ab6 100644
> > > --- a/drivers/net/vxlan.c
> > > +++ b/drivers/net/vxlan.c
> > > @@ -572,6 +572,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock 
> > > *sk,
> > >struct list_head *head,
> > >struct sk_buff *skb)
> > >  {
> > > + const struct net_offload *ops;
> > >   struct sk_buff *pp = NULL;
> > >   struct sk_buff *p;
> > >   struct vxlanhdr *vh, *vh2;
> > > @@ -606,6 +607,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock 
> > > *sk,
> > >   goto out;
> > >   }
> > >
> > > + rcu_read_lock();
> > > + ops = net_gro_receive(dev_offloads, ETH_P_TEB);
> > > + rcu_read_unlock();
> > > + if (!ops)
> > > + goto out;
> >
> > Isn't rcu_read_lock already held here?
> > RCU read lock is always held in the receive handler path
>
> There is a critical section on receive, taken in
> netif_receive_skb_core, but gro code runs before that. All the
> existing gro handlers call rcu_read_lock.

Though if dev_gro_receive is the entry point for all of gro, then
all other handlers are ensured to be executed within its rcu
readside section.


Re: [PATCH net-next RFC 6/8] net: make gro configurable

2018-09-14 Thread Willem de Bruijn
On Fri, Sep 14, 2018 at 2:39 PM Stephen Hemminger
 wrote:
>
> On Fri, 14 Sep 2018 13:59:39 -0400
> Willem de Bruijn  wrote:
>
> > diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> > index e5d236595206..8cb8e02c8ab6 100644
> > --- a/drivers/net/vxlan.c
> > +++ b/drivers/net/vxlan.c
> > @@ -572,6 +572,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock 
> > *sk,
> >struct list_head *head,
> >struct sk_buff *skb)
> >  {
> > + const struct net_offload *ops;
> >   struct sk_buff *pp = NULL;
> >   struct sk_buff *p;
> >   struct vxlanhdr *vh, *vh2;
> > @@ -606,6 +607,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock 
> > *sk,
> >   goto out;
> >   }
> >
> > + rcu_read_lock();
> > + ops = net_gro_receive(dev_offloads, ETH_P_TEB);
> > + rcu_read_unlock();
> > + if (!ops)
> > + goto out;
>
> Isn't rcu_read_lock already held here?
> RCU read lock is always held in the receive handler path

There is a critical section on receive, taken in
netif_receive_skb_core, but gro code runs before that. All the
existing gro handlers call rcu_read_lock.

> > +
> >   skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
> >
> >   list_for_each_entry(p, head, list) {
> > @@ -621,6 +628,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock 
> > *sk,
> >   }
> >
> >   pp = call_gro_receive(eth_gro_receive, head, skb);
> > +
> >   flush = 0;
>
> whitespace change crept into this patch.

Oops, thanks.


Re: [PATCH net-next RFC 6/8] net: make gro configurable

2018-09-14 Thread Stephen Hemminger
On Fri, 14 Sep 2018 13:59:39 -0400
Willem de Bruijn  wrote:

> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index e5d236595206..8cb8e02c8ab6 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -572,6 +572,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
>struct list_head *head,
>struct sk_buff *skb)
>  {
> + const struct net_offload *ops;
>   struct sk_buff *pp = NULL;
>   struct sk_buff *p;
>   struct vxlanhdr *vh, *vh2;
> @@ -606,6 +607,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
>   goto out;
>   }
>  
> + rcu_read_lock();
> + ops = net_gro_receive(dev_offloads, ETH_P_TEB);
> + rcu_read_unlock();
> + if (!ops)
> + goto out;

Isn't rcu_read_lock already held here?
RCU read lock is always held in the receive handler path

> +
>   skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
>  
>   list_for_each_entry(p, head, list) {
> @@ -621,6 +628,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
>   }
>  
>   pp = call_gro_receive(eth_gro_receive, head, skb);
> +
>   flush = 0;

whitespace change crept into this patch.


[PATCH net-next RFC 6/8] net: make gro configurable

2018-09-14 Thread Willem de Bruijn
From: Willem de Bruijn 

Add net_offload flag NET_OFF_FLAG_GRO_OFF. If set, a net_offload will
not be used for gro receive processing.

Also add sysctl helper proc_do_net_offload that toggles this flag and
register sysctls net.{core,ipv4,ipv6}.gro

Signed-off-by: Willem de Bruijn 
---
 drivers/net/vxlan.c|  8 +
 include/linux/netdevice.h  |  7 -
 net/core/dev.c |  1 +
 net/core/sysctl_net_core.c | 60 ++
 net/ipv4/sysctl_net_ipv4.c |  7 +
 net/ipv6/ip6_offload.c | 10 +--
 net/ipv6/sysctl_net_ipv6.c |  8 +
 7 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e5d236595206..8cb8e02c8ab6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -572,6 +572,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
 struct list_head *head,
 struct sk_buff *skb)
 {
+   const struct net_offload *ops;
struct sk_buff *pp = NULL;
struct sk_buff *p;
struct vxlanhdr *vh, *vh2;
@@ -606,6 +607,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
goto out;
}
 
+   rcu_read_lock();
+   ops = net_gro_receive(dev_offloads, ETH_P_TEB);
+   rcu_read_unlock();
+   if (!ops)
+   goto out;
+
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
 
list_for_each_entry(p, head, list) {
@@ -621,6 +628,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
}
 
pp = call_gro_receive(eth_gro_receive, head, skb);
+
flush = 0;
 
 out:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b9e671887fc2..93e8c9ade593 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2377,6 +2377,10 @@ struct net_offload {
 
 /* This should be set for any extension header which is compatible with GSO. */
 #define INET6_PROTO_GSO_EXTHDR 0x1
+#define NET_OFF_FLAG_GRO_OFF   0x2
+
+int proc_do_net_offload(struct ctl_table *ctl, int write, void __user *buffer,
+   size_t *lenp, loff_t *ppos);
 
 /* often modified stats are per-CPU, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
@@ -3583,7 +3587,8 @@ net_gro_receive(struct net_offload __rcu **offs, u16 type)
 
off = rcu_dereference(offs[net_offload_from_type(type)]);
if (off && off->callbacks.gro_receive &&
-   (!off->type || off->type == type))
+   (!off->type || off->type == type) &&
+   !(off->flags & NET_OFF_FLAG_GRO_OFF))
return off;
else
return NULL;
diff --git a/net/core/dev.c b/net/core/dev.c
index 20d9552afd38..0fd5273bc931 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -154,6 +154,7 @@
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
 static DEFINE_SPINLOCK(ptype_lock);
+DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;  /* Taps */
 static struct list_head offload_base __read_mostly;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e38530..d2d72afdd9eb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -34,6 +35,58 @@ static int net_msg_warn; /* Unused, but still a sysctl */
 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 
+extern spinlock_t offload_lock;
+
+#define NET_OFF_TBL_LEN256
+
+int proc_do_net_offload(struct ctl_table *ctl, int write, void __user *buffer,
+   size_t *lenp, loff_t *ppos)
+{
+   unsigned long bitmap[NET_OFF_TBL_LEN / (sizeof(unsigned long) << 3)];
+   struct ctl_table tbl = { .maxlen = NET_OFF_TBL_LEN, .data = bitmap };
+   unsigned long flag = (unsigned long) ctl->extra2;
+   struct net_offload __rcu **offs = ctl->extra1;
+   struct net_offload *off;
+   int i, ret;
+
+   memset(bitmap, 0, sizeof(bitmap));
+
+   spin_lock(&offload_lock);
+
+   for (i = 0; i < tbl.maxlen; i++) {
+   off = rcu_dereference_protected(offs[i], 
lockdep_is_held(&offload_lock));
+   if (off && off->flags & flag) {
+   /* flag specific constraints */
+   if (flag == NET_OFF_FLAG_GRO_OFF) {
+   /* gro disable bit: only if can gro */
+   if (!off->callbacks.gro_receive &&
+   !(off->flags & INET6_PROTO_GSO_EXTHDR))
+   continue;
+   }
+   set_bit(i, bitmap);
+   }
+   }
+
+   ret = proc_do_large_bitmap(&tbl, write, buffer, lenp, ppos);
+
+   if (write && !ret) {