Re: Unlock IP forwarding paths

2017-05-30 Thread Martin Pieuchot
On 31/05/17(Wed) 00:20, Alexander Bluhm wrote:
> On Tue, May 30, 2017 at 10:12:39PM +0200, Alexander Bluhm wrote:
> > On Tue, May 30, 2017 at 10:04:14PM +0200, Alexander Bluhm wrote:
> > > On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote:
> > > > Hrvoje Popovski found that ip{,6}_send_dispatch() also need the IPsec
> > > > dance.
> > > > 
> > > > Updated diff below.
> > > 
> > > I have tests this diff with my ipsec regress and a non-MP kernel.
> > > It crashed.
> > 
> > Same crash on i386 with GENERIC.MP.
> 
> Found it, you forgot to remove one if_put().  This fixes both crashes.

Thanks, I just committed it with this tweak.

> 
> bluhm
> 
> --- netinet6/ip6_input.c
> +++ netinet6/ip6_input.c
> @@ -500,7 +500,6 @@ ipv6_input(struct ifnet *ifp, struct mbuf *m)
>  #endif /* IPSEC */
>  
>   ip6_forward(m, rt, srcrt);
> - if_put(ifp);
>   return;
>   bad:
>   m_freem(m);
> 



Re: Unlock IP forwarding paths

2017-05-30 Thread Alexander Bluhm
On Tue, May 30, 2017 at 10:12:39PM +0200, Alexander Bluhm wrote:
> On Tue, May 30, 2017 at 10:04:14PM +0200, Alexander Bluhm wrote:
> > On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote:
> > > Hrvoje Popovski found that ip{,6}_send_dispatch() also need the IPsec
> > > dance.
> > > 
> > > Updated diff below.
> > 
> > I have tests this diff with my ipsec regress and a non-MP kernel.
> > It crashed.
> 
> Same crash on i386 with GENERIC.MP.

Found it, you forgot to remove one if_put().  This fixes both crashes.

bluhm

--- netinet6/ip6_input.c
+++ netinet6/ip6_input.c
@@ -500,7 +500,6 @@ ipv6_input(struct ifnet *ifp, struct mbuf *m)
 #endif /* IPSEC */
 
ip6_forward(m, rt, srcrt);
-   if_put(ifp);
return;
  bad:
m_freem(m);



Re: Unlock IP forwarding paths

2017-05-30 Thread Alexander Bluhm
On Tue, May 30, 2017 at 10:04:14PM +0200, Alexander Bluhm wrote:
> On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote:
> > Hrvoje Popovski found that ip{,6}_send_dispatch() also need the IPsec
> > dance.
> > 
> > Updated diff below.
> 
> I have tests this diff with my ipsec regress and a non-MP kernel.
> It crashed.

Same crash on i386 with GENERIC.MP.

bluhm

panic: kernel diagnostic assertion "refcnt != ~0" failed: file 
"/usr/src/sys/kern/kern_synch.c", line 682
Stopped at  db_enter+0x7:   leave
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
* 67643  67379  0 0x14000  0x2001  softnet
db_enter(d0a11c89,f5474e68,d09ec200,f5474e68,0) at db_enter+0x7
panic(d09ec200,d0968a27,d09e9c62,d09e9d1c,2aa) at panic+0x71
__assert(d0968a27,d09e9d1c,2aa,d09e9c62,86dd) at __assert+0x2e
refcnt_rele(d7721034,d76f2780,f5474eec,d03a9f52,d0bd2dc0) at refcnt_rele+0x48
refcnt_rele_wake(d7721034,d78aae00,0,f5474f0c,d0203039) at refcnt_rele_wake+0x1
2
if_input_process(1,f5474f68,d03bc3b0,f5474f90,d03a7a08) at if_input_process+0x1
3d
taskq_thread(d76f3080) at taskq_thread+0x60



Re: Unlock IP forwarding paths

2017-05-30 Thread Alexander Bluhm
On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote:
> Hrvoje Popovski found that ip{,6}_send_dispatch() also need the IPsec
> dance.
> 
> Updated diff below.

I have tests this diff with my ipsec regress and a non-MP kernel.
It crashed.

bluhm

panic: kernel diagnostic assertion "refcnt != ~0" failed: file 
"/crypt/home/bluhm/openbsd/cvs/src/sys/kern/kern_synch.c", line 682
Stopped at  db_enter+0x9:   leave
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
*127376  98546  0 0x14000  0x2000  softnet
db_enter(817cd5ee,83999c70,10,83999c50,286,8) at db_ent
er+0x9
panic(817a2400,83999dc0,800d8290,0,800d8298,fff
f83999d80) at panic+0x102
__assert(81703909,8179fc28,2aa,8179fb6a,ff000622980
c,800d8290) at __assert+0x35
refcnt_rele(800d8298,8135b754,83999e20,fe7a
,811ac575,83999de0) at refcnt_rele+0x45
refcnt_rele_wake(800d8298,ff0006ba4e00,8005f2a0,800
5f2a0,5,83999e40) at refcnt_rele_wake+0x18
if_input_process(2,83999eb0,0,0,83999eb0,80019040) at i
f_input_process+0xfa
taskq_thread(80019040,811b0fc0,80019040,811b0fc
0,0,0) at taskq_thread+0x69
end trace frame: 0x0, count: 8



Re: Unlock IP forwarding paths

2017-05-30 Thread Alexander Bluhm
On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote:
> Updated diff below.

OK bluhm@

> Index: net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.502
> diff -u -p -r1.502 if.c
> --- net/if.c  30 May 2017 07:50:37 -  1.502
> +++ net/if.c  30 May 2017 08:24:30 -
> @@ -874,7 +874,10 @@ if_input_process(void *xifidx)
>   struct ifnet *ifp;
>   struct ifih *ifih;
>   struct srp_ref sr;
> - int s;
> + int s, s2;
> +#ifdef IPSEC
> + int locked = 0;
> +#endif /* IPSEC */
>  
>   ifp = if_get(ifidx);
>   if (ifp == NULL)
> @@ -887,6 +890,32 @@ if_input_process(void *xifidx)
>   if (!ISSET(ifp->if_xflags, IFXF_CLONED))
>   add_net_randomness(ml_len(&ml));
>  
> +#ifdef IPSEC
> + /*
> +  * IPsec is not ready to run without KERNEL_LOCK().  So all
> +  * the traffic on your machine is punished if you have IPsec
> +  * enabled.
> +  */
> + extern int ipsec_in_use;
> + if (ipsec_in_use) {
> + KERNEL_LOCK();
> + locked = 1;
> + }
> +#endif /* IPSEC */
> +
> + /*
> +  * We grab the NET_LOCK() before processing any packet to
> +  * ensure there's no contention on the routing table lock.
> +  *
> +  * Without it we could race with a userland thread to insert
> +  * a L2 entry in ip{6,}_output().  Such race would result in
> +  * one of the threads sleeping *inside* the IP output path.
> +  *
> +  * Since we have a NET_LOCK() we also use it to serialize access
> +  * to PF globals, pipex globals, unicast and multicast addresses
> +  * lists.
> +  */
> + NET_LOCK(s2);
>   s = splnet();
>   while ((m = ml_dequeue(&ml)) != NULL) {
>   /*
> @@ -903,7 +932,12 @@ if_input_process(void *xifidx)
>   m_freem(m);
>   }
>   splx(s);
> + NET_UNLOCK(s2);
>  
> +#ifdef IPSEC
> + if (locked)
> + KERNEL_UNLOCK();
> +#endif /* IPSEC */
>  out:
>   if_put(ifp);
>  }
> Index: net/if_ethersubr.c
> ===
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.245
> diff -u -p -r1.245 if_ethersubr.c
> --- net/if_ethersubr.c30 May 2017 07:50:37 -  1.245
> +++ net/if_ethersubr.c30 May 2017 08:02:13 -
> @@ -416,15 +416,11 @@ decapsulate:
>  #ifdef PIPEX
>   if (pipex_enable) {
>   struct pipex_session *session;
> - int s;
>  
> - NET_LOCK(s);
>   if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
>   pipex_pppoe_input(m, session);
> - NET_UNLOCK(s);
>   return (1);
>   }
> - NET_UNLOCK(s);
>   }
>  #endif
>   if (etype == ETHERTYPE_PPPOEDISC)
> Index: netinet/ip_input.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.308
> diff -u -p -r1.308 ip_input.c
> --- netinet/ip_input.c30 May 2017 07:50:37 -  1.308
> +++ netinet/ip_input.c30 May 2017 09:44:53 -
> @@ -127,6 +127,7 @@ int ip_sysctl_ipstat(void *, size_t *, v
>  static struct mbuf_queue ipsend_mq;
>  
>  void ip_ours(struct mbuf *);
> +void ip_local(struct mbuf *);
>  int  ip_dooptions(struct mbuf *, struct ifnet *);
>  int  in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
>  
> @@ -207,27 +208,31 @@ ip_init(void)
>   mq_init(&ipsend_mq, 64, IPL_SOFTNET);
>  }
>  
> +/*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running without
> + * KERNEL_LOCK() and the transport layer still needing it.
> + */
>  void
> -ipv4_input(struct ifnet *ifp, struct mbuf *m)
> +ip_ours(struct mbuf *m)
>  {
>   niq_enqueue(&ipintrq, m);
>  }
>  
> +/*
> + * Dequeue and process locally delivered packets.
> + */
>  void
>  ipintr(void)
>  {
>   struct mbuf *m;
>  
> - /*
> -  * Get next datagram off input queue and get IP header
> -  * in first mbuf.
> -  */
>   while ((m = niq_dequeue(&ipintrq)) != NULL) {
> -#ifdef   DIAGNOSTIC
> +#ifdef DIAGNOSTIC
>   if ((m->m_flags & M_PKTHDR) == 0)
>   panic("ipintr no HDR");
>  #endif
> - ip_input(m);
> + ip_local(m);
>   }
>  }
>  
> @@ -237,18 +242,13 @@ ipintr(void)
>   * Checksum and byte swap header.  Process options. Forward or deliver.
>   */
>  void
> -ip_input(struct mbuf *m)
> +ipv4_input(struct ifnet *ifp, struct mbuf *m)
>  {
> - struct ifnet*ifp;
>   struct rtentry  *rt = NULL;
>   struct ip   *ip;
>   int hlen, len;
>   in_addr_t pfrdr = 0;
>  
> - if

Re: Unlock IP forwarding paths

2017-05-30 Thread Hrvoje Popovski
On 30.5.2017. 11:48, Martin Pieuchot wrote:
> On 30/05/17(Tue) 10:45, Martin Pieuchot wrote:
>> Diff below moves IPv4 & IPv6 incoming/forwarding path, PIPEX ppp
>> processing and IPv4 & IPv6 dispatch functions outside the KERNEL_LOCK().
>>
>> We currently rely on the NET_LOCK() serializing access to most global
>> data structures for that.  IP input queues are no longer used in the
>> forwarding case.  They still exist as boundary between the network and
>> transport layers because TCP/UDP & friends still need the KERNEL_LOCK().
>>
>> Since we do not want to grab the NET_LOCK() for every packet, the
>> softnet thread will do it once before processing a batch.  That means
>> the L2 processing path, which is currently running without lock, will
>> now run with the NET_LOCK().
>>
>> IPsec is the bridge of this layer.  A bad player.  Since IPsec isn't
>> ready to run without KERNEL_LOCK(), the softnet thread will grab the
>> KERNEL_LOCK() as soon as ``ipsec_in_use'' is set.
>>
>> I tried to document as much as possible the current design in my
>> commit messages and in the comment below.  Please ask if something
>> isn't clear.
> Hrvoje Popovski found that ip{,6}_send_dispatch() also need the IPsec
> dance.
> 
> Updated diff below.


i'm confirming that i can't reproduce panic with this diff ...



Re: Unlock IP forwarding paths

2017-05-30 Thread Martin Pieuchot
On 30/05/17(Tue) 10:45, Martin Pieuchot wrote:
> Diff below moves IPv4 & IPv6 incoming/forwarding path, PIPEX ppp
> processing and IPv4 & IPv6 dispatch functions outside the KERNEL_LOCK().
> 
> We currently rely on the NET_LOCK() serializing access to most global
> data structures for that.  IP input queues are no longer used in the
> forwarding case.  They still exist as boundary between the network and
> transport layers because TCP/UDP & friends still need the KERNEL_LOCK().
> 
> Since we do not want to grab the NET_LOCK() for every packet, the
> softnet thread will do it once before processing a batch.  That means
> the L2 processing path, which is currently running without lock, will
> now run with the NET_LOCK().
> 
> IPsec is the bridge of this layer.  A bad player.  Since IPsec isn't
> ready to run without KERNEL_LOCK(), the softnet thread will grab the
> KERNEL_LOCK() as soon as ``ipsec_in_use'' is set.
> 
> I tried to document as much as possible the current design in my
> commit messages and in the comment below.  Please ask if something
> isn't clear.

Hrvoje Popovski found that ip{,6}_send_dispatch() also need the IPsec
dance.

Updated diff below.

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.502
diff -u -p -r1.502 if.c
--- net/if.c30 May 2017 07:50:37 -  1.502
+++ net/if.c30 May 2017 08:24:30 -
@@ -874,7 +874,10 @@ if_input_process(void *xifidx)
struct ifnet *ifp;
struct ifih *ifih;
struct srp_ref sr;
-   int s;
+   int s, s2;
+#ifdef IPSEC
+   int locked = 0;
+#endif /* IPSEC */
 
ifp = if_get(ifidx);
if (ifp == NULL)
@@ -887,6 +890,32 @@ if_input_process(void *xifidx)
if (!ISSET(ifp->if_xflags, IFXF_CLONED))
add_net_randomness(ml_len(&ml));
 
+#ifdef IPSEC
+   /*
+* IPsec is not ready to run without KERNEL_LOCK().  So all
+* the traffic on your machine is punished if you have IPsec
+* enabled.
+*/
+   extern int ipsec_in_use;
+   if (ipsec_in_use) {
+   KERNEL_LOCK();
+   locked = 1;
+   }
+#endif /* IPSEC */
+
+   /*
+* We grab the NET_LOCK() before processing any packet to
+* ensure there's no contention on the routing table lock.
+*
+* Without it we could race with a userland thread to insert
+* a L2 entry in ip{6,}_output().  Such race would result in
+* one of the threads sleeping *inside* the IP output path.
+*
+* Since we have a NET_LOCK() we also use it to serialize access
+* to PF globals, pipex globals, unicast and multicast addresses
+* lists.
+*/
+   NET_LOCK(s2);
s = splnet();
while ((m = ml_dequeue(&ml)) != NULL) {
/*
@@ -903,7 +932,12 @@ if_input_process(void *xifidx)
m_freem(m);
}
splx(s);
+   NET_UNLOCK(s2);
 
+#ifdef IPSEC
+   if (locked)
+   KERNEL_UNLOCK();
+#endif /* IPSEC */
 out:
if_put(ifp);
 }
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.245
diff -u -p -r1.245 if_ethersubr.c
--- net/if_ethersubr.c  30 May 2017 07:50:37 -  1.245
+++ net/if_ethersubr.c  30 May 2017 08:02:13 -
@@ -416,15 +416,11 @@ decapsulate:
 #ifdef PIPEX
if (pipex_enable) {
struct pipex_session *session;
-   int s;
 
-   NET_LOCK(s);
if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
pipex_pppoe_input(m, session);
-   NET_UNLOCK(s);
return (1);
}
-   NET_UNLOCK(s);
}
 #endif
if (etype == ETHERTYPE_PPPOEDISC)
Index: netinet/ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.308
diff -u -p -r1.308 ip_input.c
--- netinet/ip_input.c  30 May 2017 07:50:37 -  1.308
+++ netinet/ip_input.c  30 May 2017 09:44:53 -
@@ -127,6 +127,7 @@ int ip_sysctl_ipstat(void *, size_t *, v
 static struct mbuf_queue   ipsend_mq;
 
 void   ip_ours(struct mbuf *);
+void   ip_local(struct mbuf *);
 intip_dooptions(struct mbuf *, struct ifnet *);
 intin_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
 
@@ -207,27 +208,31 @@ ip_init(void)
mq_init(&ipsend_mq, 64, IPL_SOFTNET);
 }
 
+/*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running without
+ * KERNEL_LOCK() and the transport layer still needing it.
+ */
 void
-ipv4_input(struct ifnet *ifp, struct mbuf *m)
+ip_ours(str

Unlock IP forwarding paths

2017-05-30 Thread Martin Pieuchot
Diff below moves IPv4 & IPv6 incoming/forwarding path, PIPEX ppp
processing and IPv4 & IPv6 dispatch functions outside the KERNEL_LOCK().

We currently rely on the NET_LOCK() serializing access to most global
data structures for that.  IP input queues are no longer used in the
forwarding case.  They still exist as boundary between the network and
transport layers because TCP/UDP & friends still need the KERNEL_LOCK().

Since we do not want to grab the NET_LOCK() for every packet, the
softnet thread will do it once before processing a batch.  That means
the L2 processing path, which is currently running without lock, will
now run with the NET_LOCK().

IPsec is the bridge of this layer.  A bad player.  Since IPsec isn't
ready to run without KERNEL_LOCK(), the softnet thread will grab the
KERNEL_LOCK() as soon as ``ipsec_in_use'' is set.

I tried to document as much as possible the current design in my
commit messages and in the comment below.  Please ask if something
isn't clear.

Tests and ok welcome.

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.502
diff -u -p -r1.502 if.c
--- net/if.c30 May 2017 07:50:37 -  1.502
+++ net/if.c30 May 2017 08:34:49 -
@@ -874,7 +874,10 @@ if_input_process(void *xifidx)
struct ifnet *ifp;
struct ifih *ifih;
struct srp_ref sr;
-   int s;
+   int s, s2;
+#ifdef IPSEC
+   int locked = 0;
+#endif /* IPSEC */
 
ifp = if_get(ifidx);
if (ifp == NULL)
@@ -887,6 +890,32 @@ if_input_process(void *xifidx)
if (!ISSET(ifp->if_xflags, IFXF_CLONED))
add_net_randomness(ml_len(&ml));
 
+#ifdef IPSEC
+   /*
+* IPsec is not ready to run without KERNEL_LOCK().  So all
+* the traffic on your machine is punished if you have IPsec
+* enabled.
+*/
+   extern int ipsec_in_use;
+   if (ipsec_in_use) {
+   KERNEL_LOCK();
+   locked = 1;
+   }
+#endif /* IPSEC */
+
+   /*
+* We grab the NET_LOCK() before processing any packet to
+* ensure there's no contention on the routing table lock.
+*
+* Without it we could race with a userland thread to insert
+* a L2 entry in ip{6,}_output().  Such race would result in
+* one of the threads sleeping *inside* the IP output path.
+*
+* Since we have a NET_LOCK() we also use it to serialize access
+* to PF globals, pipex globals, unicast and multicast addresses
+* lists.
+*/
+   NET_LOCK(s2);
s = splnet();
while ((m = ml_dequeue(&ml)) != NULL) {
/*
@@ -903,7 +932,12 @@ if_input_process(void *xifidx)
m_freem(m);
}
splx(s);
+   NET_UNLOCK(s2);
 
+#ifdef IPSEC
+   if (locked)
+   KERNEL_UNLOCK();
+#endif /* IPSEC */
 out:
if_put(ifp);
 }
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.245
diff -u -p -r1.245 if_ethersubr.c
--- net/if_ethersubr.c  30 May 2017 07:50:37 -  1.245
+++ net/if_ethersubr.c  30 May 2017 08:34:49 -
@@ -416,15 +416,11 @@ decapsulate:
 #ifdef PIPEX
if (pipex_enable) {
struct pipex_session *session;
-   int s;
 
-   NET_LOCK(s);
if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
pipex_pppoe_input(m, session);
-   NET_UNLOCK(s);
return (1);
}
-   NET_UNLOCK(s);
}
 #endif
if (etype == ETHERTYPE_PPPOEDISC)
Index: net/if_switch.c
===
RCS file: /cvs/src/sys/net/if_switch.c,v
retrieving revision 1.19
diff -u -p -r1.19 if_switch.c
--- net/if_switch.c 12 May 2017 13:40:29 -  1.19
+++ net/if_switch.c 30 May 2017 08:34:49 -
@@ -388,9 +388,8 @@ switch_ioctl(struct ifnet *ifp, unsigned
struct bstp_port*bp;
struct ifnet*ifs;
struct switch_port  *swpo;
-   int  s, error = 0;
+   int  error = 0;
 
-   s = splnet();
switch (cmd) {
case SIOCBRDGADD:
if ((error = suser(curproc, 0)) != 0)
@@ -481,7 +480,6 @@ switch_ioctl(struct ifnet *ifp, unsigned
break;
}
 
-   splx(s);
return (error);
 }
 
Index: netinet/ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.308
diff -u -p -r1.308 ip_input.c
--- netinet/ip_input.c  30 May 2017 07:50:37 -  1.308
+++ netinet/ip_input.c  30 May 201