On Sat, Dec 18, 2021 at 01:07:00AM +0100, Alexander Bluhm wrote: > Hi, > > There are occasions where the walker in tdb_walk() might sleep. > Case SADB_DUMP is such a case. And mvs@ has a diff that sleeps to > read the counters. So holding the tdb_sadb_mtx() when calling > walker() is not allowed. > > Move the TDB from the TDB-Hash to a list that is protected by > netlock. Then unlock tdb_sadb_mtx and traverse the list to call > walker(). > > We need less tdb_sadb_mtx dances with that solution. If needed, > netlock protection can be replaced with another rwlock later. > > ok? >
ok mvs@ > bluhm > > Index: net/pfkeyv2.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v > retrieving revision 1.228 > diff -u -p -r1.228 pfkeyv2.c > --- net/pfkeyv2.c 14 Dec 2021 17:50:37 -0000 1.228 > +++ net/pfkeyv2.c 17 Dec 2021 21:59:00 -0000 > @@ -1045,7 +1045,7 @@ pfkeyv2_sa_flush(struct tdb *tdb, void * > { > if (!(*((u_int8_t *) satype_vp)) || > tdb->tdb_satype == *((u_int8_t *) satype_vp)) > - tdb_delete_locked(tdb); > + tdb_delete(tdb); > return (0); > } > > Index: netinet/ip_ipsp.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v > retrieving revision 1.265 > diff -u -p -r1.265 ip_ipsp.c > --- netinet/ip_ipsp.c 14 Dec 2021 17:50:37 -0000 1.265 > +++ netinet/ip_ipsp.c 17 Dec 2021 22:02:27 -0000 > @@ -90,7 +90,6 @@ void tdb_firstuse(void *); > void tdb_soft_timeout(void *); > void tdb_soft_firstuse(void *); > int tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t); > -void tdb_dodelete(struct tdb *, int locked); > > int ipsec_in_use = 0; > u_int64_t ipsec_last_added = 0; > @@ -627,30 +626,36 @@ tdb_printit(void *addr, int full, int (* > int > tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg) > { > - int i, rval = 0; > - struct tdb *tdbp, *next; > + SIMPLEQ_HEAD(, tdb) tdblist; > + struct tdb *tdbp; > + int i, rval; > > /* > - * The walker may aquire the kernel lock. Grab it here to keep > - * the lock order. > + * The walker may sleep. So we cannot hold the tdb_sadb_mtx while > + * traversing the tdb_hnext list. Create a new tdb_walk list with > + * exclusive netlock protection. > */ > - KERNEL_LOCK(); > + NET_ASSERT_WLOCKED(); > + SIMPLEQ_INIT(&tdblist); > + > mtx_enter(&tdb_sadb_mtx); > for (i = 0; i <= tdb_hashmask; i++) { > - for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) { > - next = tdbp->tdb_hnext; > - > + for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) { > if (rdomain != tdbp->tdb_rdomain) > continue; > - > - if (i == tdb_hashmask && next == NULL) > - rval = walker(tdbp, (void *)arg, 1); > - else > - rval = walker(tdbp, (void *)arg, 0); > + tdb_ref(tdbp); > + SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk); > } > } > mtx_leave(&tdb_sadb_mtx); > - KERNEL_UNLOCK(); > + > + rval = 0; > + while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) { > + SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk); > + if (rval == 0) > + rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist)); > + tdb_unref(tdbp); > + } > > return rval; > } > @@ -764,7 +769,6 @@ tdb_rehash(void) > return (ENOMEM); > } > > - > for (i = 0; i <= old_hashmask; i++) { > for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) { > tdbnp = tdbp->tdb_hnext; > @@ -1004,19 +1008,6 @@ tdb_unref(struct tdb *tdb) > void > tdb_delete(struct tdb *tdbp) > { > - tdb_dodelete(tdbp, 0); > -} > - > -void > -tdb_delete_locked(struct tdb *tdbp) > -{ > - MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx); > - tdb_dodelete(tdbp, 1); > -} > - > -void > -tdb_dodelete(struct tdb *tdbp, int locked) > -{ > NET_ASSERT_LOCKED(); > > mtx_enter(&tdbp->tdb_mtx); > @@ -1026,10 +1017,7 @@ tdb_dodelete(struct tdb *tdbp, int locke > } > tdbp->tdb_flags |= TDBF_DELETED; > mtx_leave(&tdbp->tdb_mtx); > - if (locked) > - tdb_unlink_locked(tdbp); > - else > - tdb_unlink(tdbp); > + tdb_unlink(tdbp); > > /* cleanup SPD references */ > tdb_cleanspd(tdbp); > Index: netinet/ip_ipsp.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v > retrieving revision 1.231 > diff -u -p -r1.231 ip_ipsp.h > --- netinet/ip_ipsp.h 14 Dec 2021 17:50:37 -0000 1.231 > +++ netinet/ip_ipsp.h 17 Dec 2021 21:59:00 -0000 > @@ -335,6 +335,7 @@ struct tdb { /* tunnel > descriptor blo > struct tdb *tdb_snext; /* [s] src/sproto table */ > struct tdb *tdb_inext; > struct tdb *tdb_onext; > + SIMPLEQ_ENTRY(tdb) tdb_walk; /* [N] temp list for tdb walker */ > > struct refcnt tdb_refcnt; > struct mutex tdb_mtx; > @@ -583,7 +584,6 @@ struct tdb *gettdbbysrcdst_dir(u_int, u_ > void puttdb(struct tdb *); > void puttdb_locked(struct tdb *); > void tdb_delete(struct tdb *); > -void tdb_delete_locked(struct tdb *); > struct tdb *tdb_alloc(u_int); > struct tdb *tdb_ref(struct tdb *); > void tdb_unref(struct tdb *); >