On Sat, Dec 18, 2021 at 01:07:00AM +0100, Alexander Bluhm wrote:
> Hi,
> 
> There are occasions where the walker in tdb_walk() might sleep.
> Case SADB_DUMP is such a case.  And mvs@ has a diff that sleeps to
> read the counters.  So holding the tdb_sadb_mtx() when calling
> walker() is not allowed.
> 
> Move the TDB from the TDB-Hash to a list that is protected by
> netlock.  Then unlock tdb_sadb_mtx and traverse the list to call
> walker().
> 
> We need less tdb_sadb_mtx dances with that solution.  If needed,
> netlock protection can be replaced with another rwlock later.
> 
> ok?
> 

ok mvs@

> bluhm
> 
> Index: net/pfkeyv2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
> retrieving revision 1.228
> diff -u -p -r1.228 pfkeyv2.c
> --- net/pfkeyv2.c     14 Dec 2021 17:50:37 -0000      1.228
> +++ net/pfkeyv2.c     17 Dec 2021 21:59:00 -0000
> @@ -1045,7 +1045,7 @@ pfkeyv2_sa_flush(struct tdb *tdb, void *
>  {
>       if (!(*((u_int8_t *) satype_vp)) ||
>           tdb->tdb_satype == *((u_int8_t *) satype_vp))
> -             tdb_delete_locked(tdb);
> +             tdb_delete(tdb);
>       return (0);
>  }
>  
> Index: netinet/ip_ipsp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 ip_ipsp.c
> --- netinet/ip_ipsp.c 14 Dec 2021 17:50:37 -0000      1.265
> +++ netinet/ip_ipsp.c 17 Dec 2021 22:02:27 -0000
> @@ -90,7 +90,6 @@ void                tdb_firstuse(void *);
>  void         tdb_soft_timeout(void *);
>  void         tdb_soft_firstuse(void *);
>  int          tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
> -void         tdb_dodelete(struct tdb *, int locked);
>  
>  int ipsec_in_use = 0;
>  u_int64_t ipsec_last_added = 0;
> @@ -627,30 +626,36 @@ tdb_printit(void *addr, int full, int (*
>  int
>  tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg)
>  {
> -     int i, rval = 0;
> -     struct tdb *tdbp, *next;
> +     SIMPLEQ_HEAD(, tdb) tdblist;
> +     struct tdb *tdbp;
> +     int i, rval;
>  
>       /*
> -      * The walker may aquire the kernel lock.  Grab it here to keep
> -      * the lock order.
> +      * The walker may sleep.  So we cannot hold the tdb_sadb_mtx while
> +      * traversing the tdb_hnext list.  Create a new tdb_walk list with
> +      * exclusive netlock protection.
>        */
> -     KERNEL_LOCK();
> +     NET_ASSERT_WLOCKED();
> +     SIMPLEQ_INIT(&tdblist);
> +
>       mtx_enter(&tdb_sadb_mtx);
>       for (i = 0; i <= tdb_hashmask; i++) {
> -             for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) {
> -                     next = tdbp->tdb_hnext;
> -
> +             for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) {
>                       if (rdomain != tdbp->tdb_rdomain)
>                               continue;
> -
> -                     if (i == tdb_hashmask && next == NULL)
> -                             rval = walker(tdbp, (void *)arg, 1);
> -                     else
> -                             rval = walker(tdbp, (void *)arg, 0);
> +                     tdb_ref(tdbp);
> +                     SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk);
>               }
>       }
>       mtx_leave(&tdb_sadb_mtx);
> -     KERNEL_UNLOCK();
> +
> +     rval = 0;
> +     while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) {
> +             SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk);
> +             if (rval == 0)
> +                     rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist));
> +             tdb_unref(tdbp);
> +     }
>  
>       return rval;
>  }
> @@ -764,7 +769,6 @@ tdb_rehash(void)
>               return (ENOMEM);
>       }
>  
> -
>       for (i = 0; i <= old_hashmask; i++) {
>               for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) {
>                       tdbnp = tdbp->tdb_hnext;
> @@ -1004,19 +1008,6 @@ tdb_unref(struct tdb *tdb)
>  void
>  tdb_delete(struct tdb *tdbp)
>  {
> -     tdb_dodelete(tdbp, 0);
> -}
> -
> -void
> -tdb_delete_locked(struct tdb *tdbp)
> -{
> -     MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
> -     tdb_dodelete(tdbp, 1);
> -}
> -
> -void
> -tdb_dodelete(struct tdb *tdbp, int locked)
> -{
>       NET_ASSERT_LOCKED();
>  
>       mtx_enter(&tdbp->tdb_mtx);
> @@ -1026,10 +1017,7 @@ tdb_dodelete(struct tdb *tdbp, int locke
>       }
>       tdbp->tdb_flags |= TDBF_DELETED;
>       mtx_leave(&tdbp->tdb_mtx);
> -     if (locked)
> -             tdb_unlink_locked(tdbp);
> -     else
> -             tdb_unlink(tdbp);
> +     tdb_unlink(tdbp);
>  
>       /* cleanup SPD references */
>       tdb_cleanspd(tdbp);
> Index: netinet/ip_ipsp.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
> retrieving revision 1.231
> diff -u -p -r1.231 ip_ipsp.h
> --- netinet/ip_ipsp.h 14 Dec 2021 17:50:37 -0000      1.231
> +++ netinet/ip_ipsp.h 17 Dec 2021 21:59:00 -0000
> @@ -335,6 +335,7 @@ struct tdb {                              /* tunnel 
> descriptor blo
>       struct tdb      *tdb_snext;     /* [s] src/sproto table */
>       struct tdb      *tdb_inext;
>       struct tdb      *tdb_onext;
> +     SIMPLEQ_ENTRY(tdb) tdb_walk;    /* [N] temp list for tdb walker */
>  
>       struct refcnt   tdb_refcnt;
>       struct mutex    tdb_mtx;
> @@ -583,7 +584,6 @@ struct    tdb *gettdbbysrcdst_dir(u_int, u_
>  void puttdb(struct tdb *);
>  void puttdb_locked(struct tdb *);
>  void tdb_delete(struct tdb *);
> -void tdb_delete_locked(struct tdb *);
>  struct       tdb *tdb_alloc(u_int);
>  struct       tdb *tdb_ref(struct tdb *);
>  void tdb_unref(struct tdb *);
> 

Reply via email to