On Fri, Oct 25, 2024 at 07:03:57PM +0300, Vitaliy Makkoveev wrote:
> On Fri, Oct 25, 2024 at 04:48:25PM +0200, Alexander Bluhm wrote:
> > On Fri, Oct 25, 2024 at 11:52:13AM +0300, Vitaliy Makkoveev wrote:
> > > On Fri, Oct 25, 2024 at 10:18:42AM +0200, Holger Glaess wrote:
> > > > hi
> > > >
> > > > see below , is from the first reboot aber sysupgrade from 7.5 to 7.6
> > > >
> > > >
> > > > Holger
> > > >
> > > >
> > > > ===> Adding the _dhcp6leased user
> > > > panic: rw_enter: inpnotify locking against myself
> > > > Stopped at?????????? db_enter+0x14:?? popq?????? %rbp
> > > > ===> Adding the _dhcp6leased user
> > > > ?????? TID?????? PID?????? UID???????? PRFLAGS???????? PFLAGS?? CPU??
> > > > COMMAND
> > > > ??239340???? 5005?????????? 0?????? 0x100803???????? 0x2000?????? 0K
> > > > grep
> > > > *432877?? 32323?????????? 0???????? 0x14000?????????? 0x200?????? 1??
> > > > softnet0
> > > > db_enter() at db_enter+0x14
> > > > panic(ffffffff8233bb4f) at panic+0xdd
> > > > rw_enter_diag(ffffffff828bb170,1) at rw_enter_diag+0x4e
> > > > rw_enter(ffffffff828bb170,1) at rw_enter+0x103
> > > > udp_input(ffff800030da6ab8,ffff800030da6ac4,11,2) at udp_input+0x60f
> > > > ip_deliver(ffff800030da6ab8,ffff800030da6ac4,11,2,1) at ip_deliver+0xf8
> > > > ip_ours(ffff800030da6ab8,ffff800030da6ac4,ffff800030da6a0c,0) at
> > > > ip_ours+0x6f
> > > > ip_input_running
> > > > rc.sysmergeif(ffff800030da6ab8,ffff800030da6ac4,31,0,ffff8000008ab800)
> > > > at
> > > > ip_inpu
> > > > t_if+0x1f0
> > > > ipv4_input(ffff8000008ab800,fffffd807d870300) at i===> Adding the
> > > > _dhcp6leased grouppv4_input+0x38
> > > > ether_input(ffff8000008ab800,fffffd807d870300) at ether_input+0x3df
> > > > vxlan_input(ffff800001315680,fffffd807d870300,fffffd8074bcb050,0,fffffd8074bcb0
> > > > 64,1c) at vxlan_input+0x301
> > > > udp_sbappend(fffffd82779de000,fffffd807d870300,fffffd8074bcb050,0,14,fffffd8074
> > > > bcb064,cae52dbc4504571,14) at udp_sbappend+0x7f
> > > > udp_input(ffff800030da6fa8,ffff800030da6fb4,11,2) at udp_input+0x9c2
> > > > ip_deliver(ffff800030da6fa8,ffff800030da6fb4,11,2,1) at ip_deliver+0xf8
> > > > end trace frame: 0xffff800030da6eb0, count: 0
> > > > https://www.openbsd.org/ddb.html describes the minimum info required in
> > > > bug
> > > > reports.?? Insufficient info makes it difficult to find and fix bugs.
> > > >
> > >
> > > This diff should help.
> >
> > The inpt_notify lock is there to lock the inp_notify field.
> > mvs@, we should not release it while traversing over inpcblist.
> >
>
> Yeah, missed that. In other hand, we could use iterators like
> pipex_iterator() and avoid locks while doing udp_sbappend().
This is the diff with iterator. To keep it small I modified only
udp_input() loop. Holger, could you test it and approve it helps? I
tested it with udp(4) broadcasts, but can't test with vxlan(4).
Alexander, all except within inp_resize() inp_queue loops could be
converted in this way, so in_pcbresize() will be the only place where we
need to do in_pcb_is_iterator() check. Also this iterator simplifies
netlock dances in sysctl_file(). Also we don't hold rwlock(9) while
calling handler.
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.448 kern_sysctl.c
--- sys/kern/kern_sysctl.c 30 Sep 2024 12:32:26 -0000 1.448
+++ sys/kern/kern_sysctl.c 25 Oct 2024 20:32:47 -0000
@@ -1673,34 +1673,52 @@ sysctl_file(int *name, u_int namelen, ch
NET_LOCK();
mtx_enter(&tcbtable.inpt_mtx);
- TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
+ TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
FILLSO(inp->inp_socket);
+ }
mtx_leave(&tcbtable.inpt_mtx);
#ifdef INET6
mtx_enter(&tcb6table.inpt_mtx);
- TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
+ TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
FILLSO(inp->inp_socket);
+ }
mtx_leave(&tcb6table.inpt_mtx);
#endif
mtx_enter(&udbtable.inpt_mtx);
- TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
+ TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
FILLSO(inp->inp_socket);
+ }
mtx_leave(&udbtable.inpt_mtx);
#ifdef INET6
mtx_enter(&udb6table.inpt_mtx);
- TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
+ TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
FILLSO(inp->inp_socket);
+ }
mtx_leave(&udb6table.inpt_mtx);
#endif
mtx_enter(&rawcbtable.inpt_mtx);
- TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
+ TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
FILLSO(inp->inp_socket);
+ }
mtx_leave(&rawcbtable.inpt_mtx);
#ifdef INET6
mtx_enter(&rawin6pcbtable.inpt_mtx);
TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
- inp_queue)
+ inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
FILLSO(inp->inp_socket);
+ }
mtx_leave(&rawin6pcbtable.inpt_mtx);
#endif
NET_UNLOCK();
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.303 in_pcb.c
--- sys/netinet/in_pcb.c 12 Jul 2024 19:50:35 -0000 1.303
+++ sys/netinet/in_pcb.c 25 Oct 2024 20:32:47 -0000
@@ -644,6 +644,39 @@ in_pcbunref(struct inpcb *inp)
pool_put(&inpcb_pool, inp);
}
+struct inpcb *
+in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp,
+ struct inpcb_iterator *iter)
+{
+ struct inpcb *tmp;
+
+ mtx_enter(&table->inpt_mtx);
+
+ if (inp)
+ tmp = TAILQ_NEXT(inp, inp_queue);
+ else
+ tmp = TAILQ_FIRST(&table->inpt_queue);
+
+ while (tmp && tmp->inp_table == NULL)
+ tmp = TAILQ_NEXT(tmp, inp_queue);
+
+ if (inp)
+ TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter,
+ inp_queue);
+ if (tmp) {
+ TAILQ_INSERT_AFTER(&table->inpt_queue, tmp,
+ (struct inpcb *)iter, inp_queue);
+ in_pcbref(tmp);
+ }
+
+ mtx_leave(&table->inpt_mtx);
+
+ if (inp)
+ in_pcbunref(inp);
+
+ return tmp;
+}
+
void
in_setsockaddr(struct inpcb *inp, struct mbuf *nam)
{
@@ -743,6 +776,8 @@ in_pcbnotifyall(struct inpcbtable *table
rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
@@ -1098,6 +1133,8 @@ in_pcbresize(struct inpcbtable *table, i
table->inpt_size = hashsize;
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
LIST_REMOVE(inp, inp_lhash);
LIST_REMOVE(inp, inp_hash);
in_pcbhash_insert(inp);
Index: sys/netinet/in_pcb.h
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.158 in_pcb.h
--- sys/netinet/in_pcb.h 12 Jul 2024 19:50:35 -0000 1.158
+++ sys/netinet/in_pcb.h 25 Oct 2024 20:32:47 -0000
@@ -178,6 +178,20 @@ struct inpcb {
LIST_HEAD(inpcbhead, inpcb);
+struct inpcb_iterator {
+ LIST_ENTRY(inpcb) inp_hash; /* unused */
+ LIST_ENTRY(inpcb) inp_lhash; /* unused */
+ TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */
+ SIMPLEQ_ENTRY(inpcb) inp_notify; /* unused */
+ struct inpcbtable *inp_table; /* [I] always NULL */
+};
+
+static inline int
+in_pcb_is_iterator(struct inpcb *inp)
+{
+ return (inp->inp_table == NULL ? 1 : 0);
+}
+
struct inpcbtable {
struct mutex inpt_mtx; /* protect queue and hash */
struct rwlock inpt_notify; /* protect inp_notify list */
@@ -302,6 +316,9 @@ struct inpcb *
in_pcbref(struct inpcb *);
void in_pcbunref(struct inpcb *);
void in_pcbdisconnect(struct inpcb *);
+struct inpcb *
+ in_pcb_iterator(struct inpcbtable *, struct inpcb *,
+ struct inpcb_iterator *);
struct inpcb *
in_pcblookup(struct inpcbtable *, struct in_addr,
u_int, struct in_addr, u_int, u_int);
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
diff -u -p -r1.97 ip_divert.c
--- sys/netinet/ip_divert.c 16 Aug 2024 09:20:35 -0000 1.97
+++ sys/netinet/ip_divert.c 25 Oct 2024 20:32:47 -0000
@@ -203,6 +203,8 @@ divert_packet(struct mbuf *m, int dir, u
mtx_enter(&divbtable.inpt_mtx);
TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
if (inp->inp_lport != divert_port)
continue;
in_pcbref(inp);
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.160 raw_ip.c
--- sys/netinet/raw_ip.c 12 Jul 2024 19:50:35 -0000 1.160
+++ sys/netinet/raw_ip.c 25 Oct 2024 20:32:47 -0000
@@ -167,6 +167,8 @@ rip_input(struct mbuf **mp, int *offp, i
rw_enter_write(&rawcbtable.inpt_notify);
mtx_enter(&rawcbtable.inpt_mtx);
TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
/*
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.324 udp_usrreq.c
--- sys/netinet/udp_usrreq.c 6 Aug 2024 20:15:53 -0000 1.324
+++ sys/netinet/udp_usrreq.c 25 Oct 2024 20:32:47 -0000
@@ -381,7 +381,8 @@ udp_input(struct mbuf **mp, int *offp, i
}
if (m->m_flags & (M_BCAST|M_MCAST)) {
- SIMPLEQ_HEAD(, inpcb) inpcblist;
+ struct inpcb_iterator iter = {.inp_table = NULL};
+ struct inpcb *tinp = NULL;
struct inpcbtable *table;
/*
@@ -400,11 +401,6 @@ udp_input(struct mbuf **mp, int *offp, i
* fixing the interface. Maybe 4.5BSD will remedy this?)
*/
- /*
- * Locate pcb(s) for datagram.
- * (Algorithm copied from raw_intr().)
- */
- SIMPLEQ_INIT(&inpcblist);
#ifdef INET6
if (ip6)
table = &udb6table;
@@ -412,9 +408,8 @@ udp_input(struct mbuf **mp, int *offp, i
#endif
table = &udbtable;
- rw_enter_write(&table->inpt_notify);
- mtx_enter(&table->inpt_mtx);
- TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+ inp = NULL;
+ while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL){
if (ip6)
KASSERT(ISSET(inp->inp_flags, INP_IPV6));
else
@@ -465,8 +460,18 @@ udp_input(struct mbuf **mp, int *offp, i
continue;
}
- in_pcbref(inp);
- SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
+ if (tinp != NULL) {
+ struct mbuf *n;
+
+ n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+ if (n != NULL) {
+ udp_sbappend(tinp, n, ip, ip6, iphlen,
+ uh, &srcsa.sa, 0);
+ }
+ in_pcbunref(tinp);
+ }
+
+ tinp = in_pcbref(inp);
/*
* Don't look for additional matches if this one does
@@ -477,14 +482,13 @@ udp_input(struct mbuf **mp, int *offp, i
* clear these options after setting them.
*/
if ((inp->inp_socket->so_options & (SO_REUSEPORT |
- SO_REUSEADDR)) == 0)
+ SO_REUSEADDR)) == 0) {
+ in_pcbunref(inp);
break;
+ }
}
- mtx_leave(&table->inpt_mtx);
-
- if (SIMPLEQ_EMPTY(&inpcblist)) {
- rw_exit_write(&table->inpt_notify);
+ if (tinp == NULL) {
/*
* No matching pcb found; discard datagram.
* (No need to send an ICMP Port Unreachable
@@ -494,21 +498,8 @@ udp_input(struct mbuf **mp, int *offp, i
goto bad;
}
- while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
- struct mbuf *n;
-
- SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
- if (SIMPLEQ_EMPTY(&inpcblist))
- n = m;
- else
- n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
- if (n != NULL) {
- udp_sbappend(inp, n, ip, ip6, iphlen, uh,
- &srcsa.sa, 0);
- }
- in_pcbunref(inp);
- }
- rw_exit_write(&table->inpt_notify);
+ udp_sbappend(tinp, m, ip, ip6, iphlen, uh, &srcsa.sa, 0);
+ in_pcbunref(tinp);
return IPPROTO_DONE;
}
Index: sys/netinet6/in6_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.144 in6_pcb.c
--- sys/netinet6/in6_pcb.c 12 Apr 2024 16:07:09 -0000 1.144
+++ sys/netinet6/in6_pcb.c 25 Oct 2024 20:32:47 -0000
@@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table,
rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
KASSERT(ISSET(inp->inp_flags, INP_IPV6));
/*
Index: sys/netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
diff -u -p -r1.97 ip6_divert.c
--- sys/netinet6/ip6_divert.c 16 Aug 2024 09:20:35 -0000 1.97
+++ sys/netinet6/ip6_divert.c 25 Oct 2024 20:32:47 -0000
@@ -212,6 +212,8 @@ divert6_packet(struct mbuf *m, int dir,
mtx_enter(&divb6table.inpt_mtx);
TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
if (inp->inp_lport != divert_port)
continue;
in_pcbref(inp);
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
diff -u -p -r1.185 raw_ip6.c
--- sys/netinet6/raw_ip6.c 12 Jul 2024 19:50:35 -0000 1.185
+++ sys/netinet6/raw_ip6.c 25 Oct 2024 20:32:47 -0000
@@ -181,6 +181,8 @@ rip6_input(struct mbuf **mp, int *offp,
rw_enter_write(&rawin6pcbtable.inpt_notify);
mtx_enter(&rawin6pcbtable.inpt_mtx);
TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) {
+ if (in_pcb_is_iterator(inp))
+ continue;
KASSERT(ISSET(inp->inp_flags, INP_IPV6));
/*