On 08/11/17(Wed) 12:22, Saša Nedvědický wrote: > Hello, > > I think the recent diff should go in so I can pick it up to my tree. > It's the same what I have in my net/systm.h > > What I also found that in some cases, we are going to > grab KERNEL_LOCK(), while running in in_input_process(). > this typically happens when we deal with multicast forwarding > in ip_input_if():
This is a deliberate change and it is perfectly ok. In that case the KERNEL_LOCK() is just an indication that nobody did the work to say if the underlying code is safe to be executed without it. In other words, the NET_LOCK() does not protect any of the global multicast data structures and I'd like to keep it that way. Otherwise it becomes more difficult to shrink the NET_LOCK(). However if somebody wants to remove the KERNEL_LOCK() from this code path, it'd be great. > diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c > index c02e95de474..a35b3d6e533 100644 > --- a/sys/netinet/ip_input.c > +++ b/sys/netinet/ip_input.c > @@ -432,9 +432,12 @@ ip_input_if(struct mbuf **mp, int *offp, int nxt, > int af, struct ifnet *ifp) > * as expected when ip_mforward() is called from > * ip_output().) > */ > + NET_RUNLOCK(); > + NET_LOCK(); > KERNEL_LOCK(); > error = ip_mforward(m, ifp); > KERNEL_UNLOCK(); > + NET_DOWNGRADE(); > if (error) { > ipstat_inc(ips_cantforward); > goto bad; > > > I deliberately keep NET_RUNLOCK()/NET_LOCK() as separate operations, > while the reverse one is joined to NET_DOWNGRADE(). The piece above > comes from larger diff. Which I hope to send later today. I'll become off-line > then and will be back on Monday 13th. > > O.K. sashan@ > > > > 2017-11-08 11:50 GMT+01:00 Martin Pieuchot <m...@openbsd.org>: > > On 08/11/17(Wed) 10:37, Martin Pieuchot wrote: > >> We're at the stage where we want to run multiple parts of the Network > >> Stack in parallel. sashan@ is trying to get multiple network threads > >> running to exercise parallelism in pf_test() and tb@ is trying to push > >> the NET_LOCK() further down in ioctl(2) path. > >> > >> However we want to be able to gradually select which code paths can be > >> executed in parallel as the rest of the stack. So the way to move > >> forward is to introduce a read version of the NET_LOCK(), NET_RLOCK() > >> and change code paths on a case-by-case basis. > >> > >> Diff below does that for the input processing path. Note that the 3 > >> tasks below cannot run in parallel because they are scheduled on the > >> same taskq. > >> > >> Regarding assert macros, NET_ASSERT_LOCK() now required either a read > >> or write lock. I also introduced a NET_ASSERT_WLOCK() to put in paths > >> that are not ready to be run in parallel of the rest of the stack. > >> > >> Once that's in we can slowly convert existing NET_LOCK() to NET_WLOCK() > >> to be explicit. > > > > I got bitten by rw_status() once again. There's no way to tell that the > > current lock is not holding a read lock. But that's ok since what we > > really need is to assert that we're not holding a write lock. > > > > Fixed diff below, ok? > > > > Index: net/if.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if.c,v > > retrieving revision 1.523 > > diff -u -p -r1.523 if.c > > --- net/if.c 4 Nov 2017 16:58:46 -0000 1.523 > > +++ net/if.c 8 Nov 2017 10:11:13 -0000 > > @@ -905,7 +905,7 @@ if_input_process(void *xifidx) > > * to PF globals, pipex globals, unicast and multicast addresses > > * lists. > > */ > > - NET_LOCK(); > > + NET_RLOCK(); > > s = splnet(); > > while ((m = ml_dequeue(&ml)) != NULL) { > > /* > > @@ -922,7 +922,7 @@ if_input_process(void *xifidx) > > m_freem(m); > > } > > splx(s); > > - NET_UNLOCK(); > > + NET_RUNLOCK(); > > out: > > if_put(ifp); > > } > > Index: netinet/ip_input.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet/ip_input.c,v > > retrieving revision 1.329 > > diff -u -p -r1.329 ip_input.c > > --- netinet/ip_input.c 5 Nov 2017 13:19:59 -0000 1.329 > > +++ netinet/ip_input.c 8 Nov 2017 10:03:54 -0000 > > @@ -1814,11 +1814,11 @@ ip_send_dispatch(void *xmq) > > if (ml_empty(&ml)) > > return; > > > > - NET_LOCK(); > > + NET_RLOCK(); > > while ((m = ml_dequeue(&ml)) != NULL) { > > ip_output(m, NULL, NULL, 0, NULL, NULL, 0); > > } > > - NET_UNLOCK(); > > + NET_RUNLOCK(); > > } > > > > void > > Index: netinet6/ip6_input.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > > retrieving revision 1.207 > > diff -u -p -r1.207 ip6_input.c > > --- netinet6/ip6_input.c 1 Nov 2017 06:35:38 -0000 1.207 > > +++ netinet6/ip6_input.c 8 Nov 2017 10:03:54 -0000 > > @@ -1459,7 +1459,7 @@ ip6_send_dispatch(void *xmq) > > if (ml_empty(&ml)) > > return; > > > > - NET_LOCK(); > > + NET_RLOCK(); > > while ((m = ml_dequeue(&ml)) != NULL) { > > /* > > * To avoid a "too big" situation at an intermediate router > > and > > @@ -1470,7 +1470,7 @@ ip6_send_dispatch(void *xmq) > > */ > > ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); > > } > > - NET_UNLOCK(); > > + NET_RUNLOCK(); > > } > > > > void > > Index: sys/systm.h > > =================================================================== > > RCS file: /cvs/src/sys/sys/systm.h,v > > retrieving revision 1.133 > > diff -u -p -r1.133 systm.h > > --- sys/systm.h 11 Aug 2017 21:24:20 -0000 1.133 > > +++ sys/systm.h 8 Nov 2017 10:13:42 -0000 > > @@ -296,26 +296,36 @@ int uiomove(void *, size_t, struct uio * > > > > extern struct rwlock netlock; > > > > -#define NET_LOCK() > > \ > > -do { \ > > - rw_enter_write(&netlock); \ > > -} while (0) > > +#define NET_LOCK() NET_WLOCK() > > +#define NET_UNLOCK() NET_WUNLOCK() > > +#define NET_ASSERT_UNLOCKED() NET_ASSERT_WUNLOCKED() > > + > > + > > +#define NET_WLOCK() do { rw_enter_write(&netlock); } while (0) > > +#define NET_WUNLOCK() do { rw_exit_write(&netlock); } while (0) > > > > -#define NET_UNLOCK() > > \ > > +#define NET_ASSERT_WLOCKED() > > \ > > do { \ > > - rw_exit_write(&netlock); \ > > + int _s = rw_status(&netlock); \ > > + if (_s != RW_WRITE) \ > > + splassert_fail(RW_WRITE, _s, __func__); \ > > } while (0) > > > > -#define NET_ASSERT_LOCKED() > > \ > > +#define NET_ASSERT_WUNLOCKED() > > \ > > do { \ > > - if (rw_status(&netlock) != RW_WRITE) \ > > - splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\ > > + int _s = rw_status(&netlock); \ > > + if (_s == RW_WRITE) \ > > + splassert_fail(0, RW_WRITE, __func__); \ > > } while (0) > > > > -#define NET_ASSERT_UNLOCKED() > > \ > > +#define NET_RLOCK() do { rw_enter_read(&netlock); } while (0) > > +#define NET_RUNLOCK() do { rw_exit_read(&netlock); } while (0) > > + > > +#define NET_ASSERT_LOCKED() > > \ > > do { \ > > - if (rw_status(&netlock) == RW_WRITE) \ > > - splassert_fail(0, rw_status(&netlock), __func__); \ > > + int _s = rw_status(&netlock); \ > > + if (_s != RW_WRITE && _s != RW_READ) \ > > + splassert_fail(RW_READ, _s, __func__); \ > > } while (0) > > > > __returns_twice int setjmp(label_t *); >