> Date: Tue, 3 Jan 2017 17:13:27 +0100 > From: Martin Pieuchot <m...@openbsd.org> > > It seems that most of the problems exposed by the introduction of the > NET_LOCK() are related to the non-recursive nature of the rwlock. Some > known issues involve pflow(4), cloning interfaces an NFS. > > Diff below makes use of a recursive-rwlock instead. I just finished a > build on NFS with it, so I'd like to know if it works for your use case. > > Please test and report back.
This does not fix the chrome hang that I was seeing. > Index: kern/kern_rwlock.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > retrieving revision 1.27 > diff -u -p -r1.27 kern_rwlock.c > --- kern/kern_rwlock.c 14 Mar 2015 07:33:42 -0000 1.27 > +++ kern/kern_rwlock.c 3 Jan 2017 14:23:56 -0000 > @@ -346,3 +346,23 @@ rrw_status(struct rrwlock *rrwl) > { > return (rw_status(&rrwl->rrwl_lock)); > } > + > +int > +rrw_release_all(struct rrwlock *rrwl) > +{ > + int rv; > + > + rv = rrwl->rrwl_wcnt; > + KASSERT(rv >= 1); > + rrwl->rrwl_wcnt = 1; > + rrw_exit(rrwl); > + > + return (rv); > +} > + > +void > +rrw_acquire_count(struct rrwlock *rrwl, int count, int flags) > +{ > + while (count--) > + rrw_enter(rrwl, flags); > +} > Index: kern/kern_synch.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_synch.c,v > retrieving revision 1.135 > diff -u -p -r1.135 kern_synch.c > --- kern/kern_synch.c 13 Sep 2016 08:32:44 -0000 1.135 > +++ kern/kern_synch.c 3 Jan 2017 14:07:23 -0000 > @@ -231,27 +231,28 @@ msleep(const volatile void *ident, struc > * entered the sleep queue we drop the it. After sleeping we re-lock. > */ > int > -rwsleep(const volatile void *ident, struct rwlock *wl, int priority, > +rrwsleep(const volatile void *ident, struct rrwlock *rrwl, int priority, > const char *wmesg, int timo) > { > struct sleep_state sls; > int error, error1; > + int count; > > KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); > - rw_assert_wrlock(wl); > + KASSERT(rrw_status(rrwl) == RW_WRITE); > > sleep_setup(&sls, ident, priority, wmesg); > sleep_setup_timeout(&sls, timo); > sleep_setup_signal(&sls, priority); > > - rw_exit_write(wl); > + count = rrw_release_all(rrwl); > > sleep_finish(&sls, 1); > error1 = sleep_finish_timeout(&sls); > error = sleep_finish_signal(&sls); > > if ((priority & PNORELOCK) == 0) > - rw_enter_write(wl); > + rrw_acquire_count(rrwl, count, RW_WRITE); > > /* Signal errors are higher priority than timeouts. */ > if (error == 0 && error1 != 0) > Index: kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.171 > diff -u -p -r1.171 uipc_socket.c > --- kern/uipc_socket.c 29 Dec 2016 12:12:43 -0000 1.171 > +++ kern/uipc_socket.c 3 Jan 2017 14:04:42 -0000 > @@ -256,7 +256,7 @@ soclose(struct socket *so) > (so->so_state & SS_NBIO)) > goto drop; > while (so->so_state & SS_ISCONNECTED) { > - error = tsleep(&so->so_timeo, > + error = rrwsleep(&so->so_timeo, &netlock, > PSOCK | PCATCH, "netcls", > so->so_linger * hz); > if (error) > @@ -1039,7 +1039,7 @@ sorflush(struct socket *so) > struct sockbuf asb; > > sb->sb_flags |= SB_NOINTR; > - (void) sblock(sb, M_WAITOK, NULL); > + (void) sblock(sb, M_WAITOK, &netlock); > socantrcvmore(so); > sbunlock(sb); > asb = *sb; > Index: kern/uipc_socket2.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket2.c,v > retrieving revision 1.70 > diff -u -p -r1.70 uipc_socket2.c > --- kern/uipc_socket2.c 29 Dec 2016 12:12:43 -0000 1.70 > +++ kern/uipc_socket2.c 3 Jan 2017 14:06:54 -0000 > @@ -53,7 +53,7 @@ u_long sb_max = SB_MAX; /* patchable */ > extern struct pool mclpools[]; > extern struct pool mbpool; > > -int sbsleep(struct sockbuf *, struct rwlock *); > +int sbsleep(struct sockbuf *, struct rrwlock *); > > /* > * Procedures to manipulate state flags of socket > @@ -276,18 +276,18 @@ sbwait(struct sockbuf *sb) > NET_ASSERT_LOCKED(); > > sb->sb_flagsintr |= SB_WAIT; > - return (tsleep(&sb->sb_cc, > + return (rrwsleep(&sb->sb_cc, &netlock, > (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio", > sb->sb_timeo)); > } > > int > -sbsleep(struct sockbuf *sb, struct rwlock *lock) > +sbsleep(struct sockbuf *sb, struct rrwlock *rrwl) > { > int error, prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH; > > - if (lock != NULL) > - error = rwsleep(&sb->sb_flags, lock, prio, "netlck", 0); > + if (rrwl != NULL) > + error = rrwsleep(&sb->sb_flags, rrwl, prio, "netlck", 0); > else > error = tsleep(&sb->sb_flags, prio, "netlck", 0); > > @@ -295,7 +295,7 @@ sbsleep(struct sockbuf *sb, struct rwloc > } > > int > -sblock(struct sockbuf *sb, int wait, struct rwlock *lock) > +sblock(struct sockbuf *sb, int wait, struct rrwlock *rrwl) > { > int error; > > @@ -310,7 +310,7 @@ sblock(struct sockbuf *sb, int wait, str > > while (sb->sb_flags & SB_LOCK) { > sb->sb_flags |= SB_WANT; > - error = sbsleep(sb, lock); > + error = sbsleep(sb, rrwl); > if (error) > return (error); > } > Index: kern/uipc_syscalls.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.144 > diff -u -p -r1.144 uipc_syscalls.c > --- kern/uipc_syscalls.c 29 Dec 2016 12:12:43 -0000 1.144 > +++ kern/uipc_syscalls.c 3 Jan 2017 14:04:26 -0000 > @@ -296,7 +296,7 @@ redo: > head->so_error = ECONNABORTED; > break; > } > - error = tsleep(&head->so_timeo, PSOCK | PCATCH, > + error = rrwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH, > "netcon", 0); > if (error) { > goto bad; > @@ -436,7 +436,7 @@ sys_connect(struct proc *p, void *v, reg > } > NET_LOCK(s); > while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { > - error = tsleep(&so->so_timeo, PSOCK | PCATCH, > + error = rrwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH, > "netcon2", 0); > if (error) { > if (error == EINTR || error == ERESTART) > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.470 > diff -u -p -r1.470 if.c > --- net/if.c 3 Jan 2017 13:11:55 -0000 1.470 > +++ net/if.c 3 Jan 2017 14:03:51 -0000 > @@ -229,6 +229,12 @@ struct taskq *softnettq; > > struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); > > + > +/* > + * Serialize socket operations to ensure no new sleeping points > + * are introduced in IP output paths. > + */ > +struct rrwlock netlock = RRWLOCK_INITIALIZER("netlock"); > /* > * Network interface utility routines. > */ > Index: sys/rwlock.h > =================================================================== > RCS file: /cvs/src/sys/sys/rwlock.h,v > retrieving revision 1.20 > diff -u -p -r1.20 rwlock.h > --- sys/rwlock.h 21 Sep 2016 10:19:13 -0000 1.20 > +++ sys/rwlock.h 3 Jan 2017 14:03:32 -0000 > @@ -114,6 +114,8 @@ void rw_assert_unlocked(struct rwlock *) > #define rw_assert_unlocked(rwl) ((void)0) > #endif > > +#define RRWLOCK_INITIALIZER(name) { RWLOCK_INITIALIZER(name), 0 } > + > int rw_enter(struct rwlock *, int); > void rw_exit(struct rwlock *); > int rw_status(struct rwlock *); > @@ -122,6 +124,8 @@ void rrw_init(struct rrwlock *, char *); > int rrw_enter(struct rrwlock *, int); > void rrw_exit(struct rrwlock *); > int rrw_status(struct rrwlock *); > +int rrw_release_all(struct rrwlock *); > +void rrw_acquire_count(struct rrwlock *, int, int); > > #endif /* _KERNEL */ > > Index: sys/socketvar.h > =================================================================== > RCS file: /cvs/src/sys/sys/socketvar.h,v > retrieving revision 1.67 > diff -u -p -r1.67 socketvar.h > --- sys/socketvar.h 19 Dec 2016 08:36:50 -0000 1.67 > +++ sys/socketvar.h 3 Jan 2017 14:05:59 -0000 > @@ -222,7 +222,7 @@ struct rwlock; > * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. > * Returns error without lock if sleep is interrupted. > */ > -int sblock(struct sockbuf *, int, struct rwlock *); > +int sblock(struct sockbuf *, int, struct rrwlock *); > > /* release lock on sockbuf sb */ > void sbunlock(struct sockbuf *); > Index: sys/systm.h > =================================================================== > RCS file: /cvs/src/sys/sys/systm.h,v > retrieving revision 1.121 > diff -u -p -r1.121 systm.h > --- sys/systm.h 29 Dec 2016 12:12:44 -0000 1.121 > +++ sys/systm.h 3 Jan 2017 14:07:33 -0000 > @@ -247,13 +247,14 @@ int sleep_finish_signal(struct sleep_sta > void sleep_queue_init(void); > > struct mutex; > -struct rwlock; > +struct rrwlock; > void wakeup_n(const volatile void *, int); > void wakeup(const volatile void *); > #define wakeup_one(c) wakeup_n((c), 1) > int tsleep(const volatile void *, int, const char *, int); > int msleep(const volatile void *, struct mutex *, int, const char*, int); > -int rwsleep(const volatile void *, struct rwlock *, int, const char *, int); > +int rrwsleep(const volatile void *, struct rrwlock *, int, const char *, > + int); > void yield(void); > > void wdog_register(int (*)(void *, int), void *); > @@ -291,18 +292,26 @@ int uiomove(void *, size_t, struct uio * > > #if defined(_KERNEL) > > +#include <sys/rwlock.h> > + > +extern struct rrwlock netlock; > + > #define NET_LOCK(s) > \ > do { \ > + rrw_enter(&netlock, RW_WRITE); \ > s = splsoftnet(); \ > } while (0) > > #define NET_UNLOCK(s) > \ > do { \ > splx(s); \ > + rrw_exit(&netlock); \ > } while (0) > > #define NET_ASSERT_LOCKED() > \ > do { \ > + if (rrw_status(&netlock) != RW_WRITE) \ > + splassert_fail(RW_WRITE, rrw_status(&netlock), __func__);\ > splsoftassert(IPL_SOFTNET); \ > } while (0) > > >