On Wed, Mar 27, 2024 at 08:17:33AM +0100, Anton Lindqvist wrote: > > Observing two regress hangs in the kernel on netio. Both seems make use > of unix sockets. Could this be the culprit? > > regress/lib/libc/fread > regress/usr.bin/ssh (scp.sh)
Sorry for delay. It was exposed that `sb_mtx' should not be released between `so_rcv' usage check and corresponding sbwait() sleep. Otherwise wakeup() could be lost sometimes. This diff fixed regress tests. It introduces sbunlock_locked() and sbwait_locked() to perform with `sb_mtx' held. Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.322 diff -u -p -r1.322 uipc_socket.c --- sys/kern/uipc_socket.c 26 Mar 2024 09:46:47 -0000 1.322 +++ sys/kern/uipc_socket.c 27 Mar 2024 19:17:52 -0000 @@ -834,6 +834,7 @@ bad: if (mp) *mp = NULL; +restart_unlocked: solock_shared(so); restart: if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) { @@ -903,12 +904,23 @@ restart: } SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); - sb_mtx_unlock(&so->so_rcv); - sbunlock(so, &so->so_rcv); - error = sbwait(so, &so->so_rcv); - if (error) { + + if (so->so_proto->pr_protocol == AF_UNIX) { + sbunlock_locked(so, &so->so_rcv); sounlock_shared(so); - return (error); + error = sbwait_locked(so, &so->so_rcv); + sb_mtx_unlock(&so->so_rcv); + if (error) + return (error); + goto restart_unlocked; + } else { + sb_mtx_unlock(&so->so_rcv); + sbunlock(so, &so->so_rcv); + error = sbwait(so, &so->so_rcv); + if (error) { + sounlock_shared(so); + return (error); + } } goto restart; } Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.145 diff -u -p -r1.145 uipc_socket2.c --- sys/kern/uipc_socket2.c 26 Mar 2024 09:46:47 -0000 1.145 +++ sys/kern/uipc_socket2.c 27 Mar 2024 19:17:52 -0000 @@ -523,6 +523,18 @@ sbmtxassertlocked(struct socket *so, str * Wait for data to arrive at/drain from a socket buffer. */ int +sbwait_locked(struct socket *so, struct sockbuf *sb) +{ + int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH; + + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); + + sb->sb_flags |= SB_WAIT; + return msleep_nsec(&sb->sb_cc, &sb->sb_mtx, prio, "sbwait", + sb->sb_timeo_nsecs); +} + +int sbwait(struct socket *so, struct sockbuf *sb) { uint64_t timeo_nsecs; @@ -573,20 +585,23 @@ out: } void -sbunlock(struct socket *so, struct sockbuf *sb) +sbunlock_locked(struct socket *so, struct sockbuf *sb) { - int dowakeup = 0; + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); - mtx_enter(&sb->sb_mtx); sb->sb_flags &= ~SB_LOCK; if (sb->sb_flags & SB_WANT) { sb->sb_flags &= ~SB_WANT; - dowakeup = 1; + wakeup(&sb->sb_flags); } - mtx_leave(&sb->sb_mtx); +} - if (dowakeup) - wakeup(&sb->sb_flags); +void +sbunlock(struct socket *so, struct sockbuf *sb) +{ + mtx_enter(&sb->sb_mtx); + sbunlock_locked(so, sb); + mtx_leave(&sb->sb_mtx); } /* Index: sys/sys/socketvar.h =================================================================== RCS file: /cvs/src/sys/sys/socketvar.h,v retrieving revision 1.126 diff -u -p -r1.126 socketvar.h --- sys/sys/socketvar.h 26 Mar 2024 09:46:47 -0000 1.126 +++ sys/sys/socketvar.h 27 Mar 2024 19:17:53 -0000 @@ -320,6 +320,7 @@ int sblock(struct socket *, struct sockb /* release lock on sockbuf sb */ void sbunlock(struct socket *, struct sockbuf *); +void sbunlock_locked(struct socket *, struct sockbuf *); #define SB_EMPTY_FIXUP(sb) do { \ if ((sb)->sb_mb == NULL) { \ @@ -367,6 +368,7 @@ int sbcheckreserve(u_long, u_long); int sbchecklowmem(void); int sbreserve(struct socket *, struct sockbuf *, u_long); int sbwait(struct socket *, struct sockbuf *); +int sbwait_locked(struct socket *, struct sockbuf *); void soinit(void); void soabort(struct socket *); int soaccept(struct socket *, struct mbuf *);