Hi, syzkaller and witness found a bug in my pcb table mutex commit.
https://syzkaller.appspot.com/bug?id=90a4811c99d6a2df7b252971b754612ca632894d For multicast and broadcast packets udp_input() traverses the loop of all UDP PCBs. There it calls udp_sbappend() while holding the UDB table mutex. This results in sorwakeup() and finally kernel lock while holding a mutex. I use the same solution as for PCB notify. Collect the affected PCBs in a temporary list. This list is protected by exclusive net lock. When we unlock the protocol layer this has to be reconsidered. The loop for raw sockets is on my todo list. ok? bluhm Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v retrieving revision 1.127 diff -u -p -r1.127 in_pcb.h --- netinet/in_pcb.h 21 Mar 2022 09:12:34 -0000 1.127 +++ netinet/in_pcb.h 21 Mar 2022 14:52:03 -0000 @@ -102,7 +102,7 @@ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */ LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */ TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ - SIMPLEQ_ENTRY(inpcb) inp_notify; /* [N] queue to notify PCB */ + SIMPLEQ_ENTRY(inpcb) inp_notify; /* [N] notify or udp append */ struct inpcbtable *inp_table; /* [I] inet queue/hash table */ union inpaddru inp_faddru; /* Foreign address. */ union inpaddru inp_laddru; /* Local address. */ Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.275 diff -u -p -r1.275 udp_usrreq.c --- netinet/udp_usrreq.c 21 Mar 2022 09:12:34 -0000 1.275 +++ netinet/udp_usrreq.c 21 Mar 2022 14:52:55 -0000 @@ -342,7 +342,8 @@ udp_input(struct mbuf **mp, int *offp, i } if (m->m_flags & (M_BCAST|M_MCAST)) { - struct inpcb *last; + SIMPLEQ_HEAD(, inpcb) inpcblist; + /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match @@ -363,8 +364,8 @@ udp_input(struct mbuf **mp, int *offp, i * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ - last = NULL; - NET_ASSERT_LOCKED(); + NET_ASSERT_WLOCKED(); + SIMPLEQ_INIT(&inpcblist); mtx_enter(&udbtable.inpt_mtx); TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) @@ -419,16 +420,9 @@ udp_input(struct mbuf **mp, int *offp, i continue; } - if (last != NULL) { - struct mbuf *n; + in_pcbref(inp); + SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); - n = m_copym(m, 0, M_COPYALL, M_NOWAIT); - if (n != NULL) { - udp_sbappend(last, n, ip, ip6, iphlen, - uh, &srcsa.sa, 0); - } - } - last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR @@ -437,13 +431,13 @@ udp_input(struct mbuf **mp, int *offp, i * port. It assumes that an application will never * clear these options after setting them. */ - if ((last->inp_socket->so_options & (SO_REUSEPORT | + if ((inp->inp_socket->so_options & (SO_REUSEPORT | SO_REUSEADDR)) == 0) break; } mtx_leave(&udbtable.inpt_mtx); - if (last == NULL) { + if (SIMPLEQ_EMPTY(&inpcblist)) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable @@ -453,7 +447,20 @@ udp_input(struct mbuf **mp, int *offp, i goto bad; } - udp_sbappend(last, m, ip, ip6, iphlen, uh, &srcsa.sa, 0); + while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { + struct mbuf *n; + + SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); + if (SIMPLEQ_EMPTY(&inpcblist)) + n = m; + else + n = m_copym(m, 0, M_COPYALL, M_NOWAIT); + if (n != NULL) { + udp_sbappend(inp, n, ip, ip6, iphlen, uh, + &srcsa.sa, 0); + } + in_pcbunref(inp); + } return IPPROTO_DONE; } /*