Hi,
There are occasions where the walker in tdb_walk() might sleep.
Case SADB_DUMP is such a case. And mvs@ has a diff that sleeps to
read the counters. So holding the tdb_sadb_mtx() when calling
walker() is not allowed.
Move the TDB from the TDB-Hash to a list that is protected by
netlock. Then unlock tdb_sadb_mtx and traverse the list to call
walker().
We need less tdb_sadb_mtx dances with that solution. If needed,
netlock protection can be replaced with another rwlock later.
ok?
bluhm
Index: net/pfkeyv2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.228
diff -u -p -r1.228 pfkeyv2.c
--- net/pfkeyv2.c 14 Dec 2021 17:50:37 -0000 1.228
+++ net/pfkeyv2.c 17 Dec 2021 21:59:00 -0000
@@ -1045,7 +1045,7 @@ pfkeyv2_sa_flush(struct tdb *tdb, void *
{
if (!(*((u_int8_t *) satype_vp)) ||
tdb->tdb_satype == *((u_int8_t *) satype_vp))
- tdb_delete_locked(tdb);
+ tdb_delete(tdb);
return (0);
}
Index: netinet/ip_ipsp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.265
diff -u -p -r1.265 ip_ipsp.c
--- netinet/ip_ipsp.c 14 Dec 2021 17:50:37 -0000 1.265
+++ netinet/ip_ipsp.c 17 Dec 2021 22:02:27 -0000
@@ -90,7 +90,6 @@ void tdb_firstuse(void *);
void tdb_soft_timeout(void *);
void tdb_soft_firstuse(void *);
int tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
-void tdb_dodelete(struct tdb *, int locked);
int ipsec_in_use = 0;
u_int64_t ipsec_last_added = 0;
@@ -627,30 +626,36 @@ tdb_printit(void *addr, int full, int (*
int
tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg)
{
- int i, rval = 0;
- struct tdb *tdbp, *next;
+ SIMPLEQ_HEAD(, tdb) tdblist;
+ struct tdb *tdbp;
+ int i, rval;
/*
- * The walker may aquire the kernel lock. Grab it here to keep
- * the lock order.
+ * The walker may sleep. So we cannot hold the tdb_sadb_mtx while
+ * traversing the tdb_hnext list. Create a new tdb_walk list with
+ * exclusive netlock protection.
*/
- KERNEL_LOCK();
+ NET_ASSERT_WLOCKED();
+ SIMPLEQ_INIT(&tdblist);
+
mtx_enter(&tdb_sadb_mtx);
for (i = 0; i <= tdb_hashmask; i++) {
- for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) {
- next = tdbp->tdb_hnext;
-
+ for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) {
if (rdomain != tdbp->tdb_rdomain)
continue;
-
- if (i == tdb_hashmask && next == NULL)
- rval = walker(tdbp, (void *)arg, 1);
- else
- rval = walker(tdbp, (void *)arg, 0);
+ tdb_ref(tdbp);
+ SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk);
}
}
mtx_leave(&tdb_sadb_mtx);
- KERNEL_UNLOCK();
+
+ rval = 0;
+ while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) {
+ SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk);
+ if (rval == 0)
+ rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist));
+ tdb_unref(tdbp);
+ }
return rval;
}
@@ -764,7 +769,6 @@ tdb_rehash(void)
return (ENOMEM);
}
-
for (i = 0; i <= old_hashmask; i++) {
for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) {
tdbnp = tdbp->tdb_hnext;
@@ -1004,19 +1008,6 @@ tdb_unref(struct tdb *tdb)
void
tdb_delete(struct tdb *tdbp)
{
- tdb_dodelete(tdbp, 0);
-}
-
-void
-tdb_delete_locked(struct tdb *tdbp)
-{
- MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
- tdb_dodelete(tdbp, 1);
-}
-
-void
-tdb_dodelete(struct tdb *tdbp, int locked)
-{
NET_ASSERT_LOCKED();
mtx_enter(&tdbp->tdb_mtx);
@@ -1026,10 +1017,7 @@ tdb_dodelete(struct tdb *tdbp, int locke
}
tdbp->tdb_flags |= TDBF_DELETED;
mtx_leave(&tdbp->tdb_mtx);
- if (locked)
- tdb_unlink_locked(tdbp);
- else
- tdb_unlink(tdbp);
+ tdb_unlink(tdbp);
/* cleanup SPD references */
tdb_cleanspd(tdbp);
Index: netinet/ip_ipsp.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.231
diff -u -p -r1.231 ip_ipsp.h
--- netinet/ip_ipsp.h 14 Dec 2021 17:50:37 -0000 1.231
+++ netinet/ip_ipsp.h 17 Dec 2021 21:59:00 -0000
@@ -335,6 +335,7 @@ struct tdb { /* tunnel
descriptor blo
struct tdb *tdb_snext; /* [s] src/sproto table */
struct tdb *tdb_inext;
struct tdb *tdb_onext;
+ SIMPLEQ_ENTRY(tdb) tdb_walk; /* [N] temp list for tdb walker */
struct refcnt tdb_refcnt;
struct mutex tdb_mtx;
@@ -583,7 +584,6 @@ struct tdb *gettdbbysrcdst_dir(u_int, u_
void puttdb(struct tdb *);
void puttdb_locked(struct tdb *);
void tdb_delete(struct tdb *);
-void tdb_delete_locked(struct tdb *);
struct tdb *tdb_alloc(u_int);
struct tdb *tdb_ref(struct tdb *);
void tdb_unref(struct tdb *);