Test wanted: free unix domain (a.k.a SOCKET_LOCK())
dtucker@ reported an interesting recursion [0]. His trace shows that a thread executing unp_detach() MUST NOT be holding the NET_LOCK(). So here's a new version of my SOCKET_LOCK() diff that does exactly that. That means sofree(9) won't grab the NET_LOCK() for unix sockets which makes uipc_usrreq() completely NET_LOCK() free. Please test and report back. [0] https://marc.info/?l=openbsd-misc&m=148661605114230&w=2 Index: kern/sys_socket.c === RCS file: /cvs/src/sys/kern/sys_socket.c,v retrieving revision 1.28 diff -u -p -r1.28 sys_socket.c --- kern/sys_socket.c 31 Jan 2017 12:16:20 - 1.28 +++ kern/sys_socket.c 9 Feb 2017 11:21:44 - @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -127,10 +128,10 @@ soo_ioctl(struct file *fp, u_long cmd, c } if (IOCGROUP(cmd) == 'r') return (EOPNOTSUPP); - NET_LOCK(s); + SOCKET_LOCK(so, s); error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)NULL, p)); - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (error); } @@ -187,10 +188,10 @@ soo_stat(struct file *fp, struct stat *u ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; ub->st_uid = so->so_euid; ub->st_gid = so->so_egid; - NET_LOCK(s); + SOCKET_LOCK(so, s); (void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, (struct mbuf *)ub, NULL, NULL, p)); - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (0); } Index: kern/uipc_socket.c === RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.176 diff -u -p -r1.176 uipc_socket.c --- kern/uipc_socket.c 1 Feb 2017 20:59:47 - 1.176 +++ kern/uipc_socket.c 9 Feb 2017 11:21:44 - @@ -135,16 +135,16 @@ socreate(int dom, struct socket **aso, i so->so_egid = p->p_ucred->cr_gid; so->so_cpid = p->p_p->ps_pid; so->so_proto = prp; - NET_LOCK(s); + SOCKET_LOCK(so, s); error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, (struct mbuf *)(long)proto, NULL, p); if (error) { so->so_state |= SS_NOFDREF; sofree(so); - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (error); } - NET_UNLOCK(s); + SOCKET_UNLOCK(s); *aso = so; return (0); } @@ -154,9 +154,9 @@ sobind(struct socket *so, struct mbuf *n { int s, error; - NET_LOCK(s); + SOCKET_LOCK(so, s); error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (error); } @@ -171,11 +171,11 @@ solisten(struct socket *so, int backlog) if (isspliced(so) || issplicedback(so)) return (EOPNOTSUPP); #endif /* SOCKET_SPLICE */ - NET_LOCK(s); + SOCKET_LOCK(so, s); error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, curproc); if (error) { - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (error); } if (TAILQ_FIRST(&so->so_q) == NULL) @@ -185,15 +185,13 @@ solisten(struct socket *so, int backlog) if (backlog < sominconn) backlog = sominconn; so->so_qlimit = backlog; - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (0); } void sofree(struct socket *so) { - NET_ASSERT_LOCKED(); - if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; if (so->so_head) { @@ -232,7 +230,7 @@ soclose(struct socket *so) struct socket *so2; int s, error = 0; - NET_LOCK(s); + SOCKET_LOCK(so, s); if (so->so_options & SO_ACCEPTCONN) { while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { (void) soqremque(so2, 0); @@ -256,7 +254,7 @@ soclose(struct socket *so) (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = rwsleep(&so->so_timeo, &netlock, + error = sosleep(so, &so->so_timeo, PSOCK | PCATCH, "netcls", so->so_linger * hz); if (error) @@ -276,7 +274,7 @@ discard: panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); so->so_state |= SS_NOFDREF; sofree(so); - NET_UNLOCK(s); + SOCKET_UNLOCK(s); return (error); } @@ -294,7 +292,7 @@ soaccept(struct socket *so, struct mbuf { int error = 0; - NET_ASSERT_LOCKED(); + SOCKET_ASSERT_LOCKED(so
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
On Thu, 09 Feb 2017 12:36:44 +0100, Martin Pieuchot wrote: > dtucker@ reported an interesting recursion [0]. His trace shows that > a thread executing unp_detach() MUST NOT be holding the NET_LOCK(). > > So here's a new version of my SOCKET_LOCK() diff that does exactly > that. That means sofree(9) won't grab the NET_LOCK() for unix sockets > which makes uipc_usrreq() completely NET_LOCK() free. The NET_ASSERT_UNLOCKED() in the PRU_BIND case in uipc_usrreq() appears to be superfluous since you've added an assert before the switch() too. - todd
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
On Thu, 09 Feb 2017 08:27:51 -0700, "Todd C. Miller" wrote: > On Thu, 09 Feb 2017 12:36:44 +0100, Martin Pieuchot wrote: > > > dtucker@ reported an interesting recursion [0]. His trace shows that > > a thread executing unp_detach() MUST NOT be holding the NET_LOCK(). > > > > So here's a new version of my SOCKET_LOCK() diff that does exactly > > that. That means sofree(9) won't grab the NET_LOCK() for unix sockets > > which makes uipc_usrreq() completely NET_LOCK() free. > > The NET_ASSERT_UNLOCKED() in the PRU_BIND case in uipc_usrreq() > appears to be superfluous since you've added an assert before the > switch() too. Also, since unp_connect() is only called via uipc_usrreq() there is no need for NET_ASSERT_UNLOCKED() in unp_connect(). - todd
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
On Thu, Feb 09, 2017 at 12:36:44PM +0100, Martin Pieuchot wrote: > [0] https://marc.info/?l=openbsd-misc&m=148661605114230&w=2 My test shows that unix doamin socket on NFS file system works. > void > sofree(struct socket *so) > { > - NET_ASSERT_LOCKED(); > - Could you keep an SOCKET_ASSERT_LOCKED(so) here? > @@ -232,7 +230,7 @@ soclose(struct socket *so) > struct socket *so2; > int s, error = 0; > > - NET_LOCK(s); > + SOCKET_LOCK(so, s); > if (so->so_options & SO_ACCEPTCONN) { > while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { > (void) soqremque(so2, 0); Here comes the call to soabort(so2). int soabort(struct socket *so) { NET_ASSERT_LOCKED(); So this must be an SOCKET_ASSERT_LOCKED(so). splassert: soabort: want 64 have 0 Starting stack trace... splassert_check(40,0,d09e5ddb,d09d5adf,d03cfed0) at splassert_check+0x78 splassert_check(40,d09e5ddb,d09e5ddb,d03cabb2,d97d2e68) at splassert_check+0x78 soabort(d9553484,1,0,0,0) at soabort+0x5c soclose(d9553ba4,0,2f39005a,d97d2e60,0) at soclose+0x188 soo_close(d97d20a0,d95c7ea0,2f39005a,0,0) at soo_close+0x1b fdrop(d97d20a0,d95c7ea0,f5786edc,d03a29df,d954472c) at fdrop+0x28 closef(d97d20a0,d95c7ea0,f5786f0c,d03a93e2,d0bca2c0) at closef+0xcc sys_close(d95c7ea0,f5786f5c,f5786f7c,0,f5786fa8) at sys_close+0x5a syscall() at syscall+0x250 > @@ -121,6 +121,9 @@ uipc_usrreq(struct socket *so, int req, > error = EINVAL; > goto release; > } > + > + NET_ASSERT_UNLOCKED(); > + > switch (req) { ... > case PRU_BIND: > - /* XXXSMP breaks atomicity */ > - rw_exit_write(&netlock); > + NET_ASSERT_UNLOCKED(); Do not assert twice. > unp_detach(struct unpcb *unp) > { > struct vnode *vp; > - > - NET_ASSERT_UNLOCKED(); > - > + There is a tab after the + > @@ -539,7 +526,7 @@ unp_connect(struct socket *so, struct mb > error = EPROTOTYPE; > goto bad; > } > - NET_LOCK(s); > + SOCKET_LOCK(so, s); > if (so->so_proto->pr_flags & PR_CONNREQUIRED) { > if ((so2->so_options & SO_ACCEPTCONN) == 0 || > (so3 = sonewconn(so2, 0)) == 0) { > @@ -564,7 +551,7 @@ unp_connect(struct socket *so, struct mb > } > error = unp_connect2(so, so2); > unlock: > - NET_UNLOCK(s); > + SOCKET_UNLOCK(s); > bad: The socket sould be a PF_LOCAL anyway, so SOCKET_LOCK() and SOCKET_UNLOCK() do nothing. Remove these lines. > +#define SOCKET_LOCK(so, s) \ > +do { \ > + if (so->so_proto->pr_domain->dom_family != PF_LOCAL)\ > + NET_LOCK(s);\ > + else\ > + s = -42;\ > +} while (0) > + > +#define SOCKET_UNLOCK(s) > \ > +do { \ > + if (s != -42) \ > + NET_UNLOCK(s); \ > +} while (0) The 42 looks like an evil hack. bluhm
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
On Thu, Feb 09, 2017 at 12:36:44PM +0100, Martin Pieuchot wrote: > Please test and report back. This diff triggers a deadlock in /usr/src/regress/sys/net/pf_divert. Several processes hang in netlock on the remote machine. root@ot2:.../~# ps axkl | grep netlock 0 27718 0 0 10 0 0 0 netlock DK??0:01.66 (softcloc 0 65641 0 0 10 0 0 0 netlock DK??0:01.66 (systq) 0 98536 0 0 10 0 0 0 netlock DK??0:23.32 (softnet) 0 29505 0 0 10 0 0 0 netlock DK??0:00.12 (pfpurge) 0 31797 57781 0 10 0 912 2932 netlock Ds??0:00.03 sshd: roo After the test timeout the machine recovers. run-regress-inet-args-icmp-to time SUDO= perl -I/usr/src/regress/sys/net/pf_divert /usr/src/regress/sys/net/pf_divert/remote.pl -f inet 10.188.81.21 10.188.81.188 ot2 /usr/src/regress/sys/net/pf_divert/args-icmp-to.pl Remote no 'Shutdown' in server.log after 20 seconds at /usr/src/regress/sys/net/pf_divert/remote.pl line 211. *** Error 255 in /usr/src/regress/sys/net/pf_divert (Makefile:136 'run-regress-inet-args-icmp-to') bluhm
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
The process holding the netlock is: 34831 184306 25445 0 30x80 netio perl ddb{0}> trace /p 0t184306 sleep_finish(d0bceda0,d09e1903,f5536d2c,d03ce235,db7e4514) at sleep_finish+0xb4 sleep_finish(f5536d3c,1,118,d09e5f10,0) at sleep_finish+0xb4 tsleep(db544cdc,118,d09e5f10,0,0,0,f5536dec,d0570a6e,db53ea40,c9303c25,f5536dbc ,d03aab39) at tsleep+0x14f sbwait(db544c8c,db544cdc,0,0,f5536ef4) at sbwait+0xa8 soreceive(db544c8c,f5536e94,f5536e6c,0,0) at soreceive+0x43d recvit(db6ee2a8,0,f5536ed8,cf7da9f0,f5536f7c) at recvit+0x122 sys_recvfrom(db6ee2a8,f5536f5c,f5536f7c,0,f5536fa8) at sys_recvfrom+0x90 syscall() at syscall+0x250 The line in soreceive() is here /usr/src/sys/kern/uipc_socket.c:736 sbunlock(&so->so_rcv); error = sbwait(so, &so->so_rcv); SOCKET_UNLOCK(s); sbwait() should call rwsleep() not tsleep(). On Thu, Feb 09, 2017 at 12:36:44PM +0100, Martin Pieuchot wrote: > +int > +sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int > timo) > +{ > + if (so->so_proto->pr_protocol != PF_LOCAL) > + return rwsleep(ident, &netlock, prio, wmesg, timo); > + else > + return tsleep(ident, prio, wmesg, timo); > +} Here the check must be (so->so_proto->pr_domain->dom_family != PF_LOCAL). bluhm
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
On 10/02/17(Fri) 15:21, Alexander Bluhm wrote: > [..] > The 42 looks like an evil hack. It is. I couldn't come to a better solution without refactoring the pr_usrreq interface. I think that we still need to do it if we want to unlock the syscalls. But I'd argue that having something that work and allow people working on the forwarding path to make progress is good, even if it is a hack. That said I'm open to better solutions. Here's an updated diff that should fix all your previous comments and those of millert@. I also change the SOCKET_LOCK() macros into real functions to stop polluting . It also find it easier to follow the lock ordering between sockerbuffer and NET_LOCK(). Comments, oks? Index: kern/sys_socket.c === RCS file: /cvs/src/sys/kern/sys_socket.c,v retrieving revision 1.28 diff -u -p -r1.28 sys_socket.c --- kern/sys_socket.c 31 Jan 2017 12:16:20 - 1.28 +++ kern/sys_socket.c 13 Feb 2017 09:28:51 - @@ -127,10 +127,10 @@ soo_ioctl(struct file *fp, u_long cmd, c } if (IOCGROUP(cmd) == 'r') return (EOPNOTSUPP); - NET_LOCK(s); + s = solock(so); error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)NULL, p)); - NET_UNLOCK(s); + sounlock(s); return (error); } @@ -187,10 +187,10 @@ soo_stat(struct file *fp, struct stat *u ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; ub->st_uid = so->so_euid; ub->st_gid = so->so_egid; - NET_LOCK(s); + s = solock(so); (void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, (struct mbuf *)ub, NULL, NULL, p)); - NET_UNLOCK(s); + sounlock(s); return (0); } Index: kern/uipc_socket.c === RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.176 diff -u -p -r1.176 uipc_socket.c --- kern/uipc_socket.c 1 Feb 2017 20:59:47 - 1.176 +++ kern/uipc_socket.c 13 Feb 2017 09:29:45 - @@ -135,16 +135,16 @@ socreate(int dom, struct socket **aso, i so->so_egid = p->p_ucred->cr_gid; so->so_cpid = p->p_p->ps_pid; so->so_proto = prp; - NET_LOCK(s); + s = solock(so); error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, (struct mbuf *)(long)proto, NULL, p); if (error) { so->so_state |= SS_NOFDREF; sofree(so); - NET_UNLOCK(s); + sounlock(s); return (error); } - NET_UNLOCK(s); + sounlock(s); *aso = so; return (0); } @@ -154,9 +154,9 @@ sobind(struct socket *so, struct mbuf *n { int s, error; - NET_LOCK(s); + s = solock(so); error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); - NET_UNLOCK(s); + sounlock(s); return (error); } @@ -171,11 +171,11 @@ solisten(struct socket *so, int backlog) if (isspliced(so) || issplicedback(so)) return (EOPNOTSUPP); #endif /* SOCKET_SPLICE */ - NET_LOCK(s); + s = solock(so); error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, curproc); if (error) { - NET_UNLOCK(s); + sounlock(s); return (error); } if (TAILQ_FIRST(&so->so_q) == NULL) @@ -185,14 +185,14 @@ solisten(struct socket *so, int backlog) if (backlog < sominconn) backlog = sominconn; so->so_qlimit = backlog; - NET_UNLOCK(s); + sounlock(s); return (0); } void sofree(struct socket *so) { - NET_ASSERT_LOCKED(); + soassertlocked(so); if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; @@ -232,7 +232,7 @@ soclose(struct socket *so) struct socket *so2; int s, error = 0; - NET_LOCK(s); + s = solock(so); if (so->so_options & SO_ACCEPTCONN) { while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { (void) soqremque(so2, 0); @@ -256,7 +256,7 @@ soclose(struct socket *so) (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = rwsleep(&so->so_timeo, &netlock, + error = sosleep(so, &so->so_timeo, PSOCK | PCATCH, "netcls", so->so_linger * hz); if (error) @@ -276,14 +276,14 @@ discard: panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); so->so_state |= SS_NOFDREF; sofree(so); - NET_UNLOCK(s); + sounlock(s); return (error); } int soabort
Re: Test wanted: free unix domain (a.k.a SOCKET_LOCK())
On Mon, Feb 13, 2017 at 10:45:43AM +0100, Martin Pieuchot wrote: > On 10/02/17(Fri) 15:21, Alexander Bluhm wrote: > > The 42 looks like an evil hack. > It is. I couldn't come to a better solution without refactoring the > pr_usrreq interface. The only problem I see is sofree(). In kern/uipc_socket.c sofree(so) is called two times before unlock(s), so we cannot pass so to check so->so_proto->pr_domain->dom_family. There must be a better solution than 42, but this should not delay this diff. Add a comment that we have to fix it later. > I also change the SOCKET_LOCK() macros into real > functions to stop polluting . I would prefer if NET_LOCK(s) and solock(so) would look similar. So either s = netlock(); s = solock(so); or NET_LOCK(s); SOCKET_LOCK(so, s); > Comments, oks? I did run the regression tests. The same tests pass or fail with and without this diff. OK bluhm@ > Index: kern/sys_socket.c > === > RCS file: /cvs/src/sys/kern/sys_socket.c,v > retrieving revision 1.28 > diff -u -p -r1.28 sys_socket.c > --- kern/sys_socket.c 31 Jan 2017 12:16:20 - 1.28 > +++ kern/sys_socket.c 13 Feb 2017 09:28:51 - > @@ -127,10 +127,10 @@ soo_ioctl(struct file *fp, u_long cmd, c > } > if (IOCGROUP(cmd) == 'r') > return (EOPNOTSUPP); > - NET_LOCK(s); > + s = solock(so); > error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, > (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)NULL, p)); > - NET_UNLOCK(s); > + sounlock(s); > > return (error); > } > @@ -187,10 +187,10 @@ soo_stat(struct file *fp, struct stat *u > ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; > ub->st_uid = so->so_euid; > ub->st_gid = so->so_egid; > - NET_LOCK(s); > + s = solock(so); > (void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, > (struct mbuf *)ub, NULL, NULL, p)); > - NET_UNLOCK(s); > + sounlock(s); > return (0); > } > > Index: kern/uipc_socket.c > === > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.176 > diff -u -p -r1.176 uipc_socket.c > --- kern/uipc_socket.c1 Feb 2017 20:59:47 - 1.176 > +++ kern/uipc_socket.c13 Feb 2017 09:29:45 - > @@ -135,16 +135,16 @@ socreate(int dom, struct socket **aso, i > so->so_egid = p->p_ucred->cr_gid; > so->so_cpid = p->p_p->ps_pid; > so->so_proto = prp; > - NET_LOCK(s); > + s = solock(so); > error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, > (struct mbuf *)(long)proto, NULL, p); > if (error) { > so->so_state |= SS_NOFDREF; > sofree(so); > - NET_UNLOCK(s); > + sounlock(s); > return (error); > } > - NET_UNLOCK(s); > + sounlock(s); > *aso = so; > return (0); > } > @@ -154,9 +154,9 @@ sobind(struct socket *so, struct mbuf *n > { > int s, error; > > - NET_LOCK(s); > + s = solock(so); > error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); > - NET_UNLOCK(s); > + sounlock(s); > return (error); > } > > @@ -171,11 +171,11 @@ solisten(struct socket *so, int backlog) > if (isspliced(so) || issplicedback(so)) > return (EOPNOTSUPP); > #endif /* SOCKET_SPLICE */ > - NET_LOCK(s); > + s = solock(so); > error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, > curproc); > if (error) { > - NET_UNLOCK(s); > + sounlock(s); > return (error); > } > if (TAILQ_FIRST(&so->so_q) == NULL) > @@ -185,14 +185,14 @@ solisten(struct socket *so, int backlog) > if (backlog < sominconn) > backlog = sominconn; > so->so_qlimit = backlog; > - NET_UNLOCK(s); > + sounlock(s); > return (0); > } > > void > sofree(struct socket *so) > { > - NET_ASSERT_LOCKED(); > + soassertlocked(so); > > if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) > return; > @@ -232,7 +232,7 @@ soclose(struct socket *so) > struct socket *so2; > int s, error = 0; > > - NET_LOCK(s); > + s = solock(so); > if (so->so_options & SO_ACCEPTCONN) { > while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { > (void) soqremque(so2, 0); > @@ -256,7 +256,7 @@ soclose(struct socket *so) > (so->so_state & SS_NBIO)) > goto drop; > while (so->so_state & SS_ISCONNECTED) { > - error = rwsleep(&so->so_timeo, &netlock, > + error = sosleep(so, &so->so_timeo, > PSOCK | PCATCH, "netcls", > so->so_linger * hz); >