25.04.2022 19:09, Alexander Bluhm пишет:
On Mon, Apr 25, 2022 at 05:49:49PM +0300, Vitaliy Makkoveev wrote:
I would suggest to replace the timeout garbage collector with
reference counting.  This diff is only compile tested as I have no
setup for IPsec ids yet.  Should work on 7.1 an -current.

bluhm
Your diff is broken, it keeps `id_gc_list' as part of 'ipsec_ids' and
still removes `ids' from this non-existing list:
This happens when I send code that I have not tested.  Updated diff
below.

Also it modifies existing behaviour, we stop to re-use dead `ids' for
some period after it's removal. I don't know why we do this, may be some
one involved to this layer could explain.
I will ask markus@

bluhm

Index: net/pfkeyv2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.233
diff -u -p -r1.233 pfkeyv2.c
--- net/pfkeyv2.c       13 Mar 2022 21:38:32 -0000      1.233
+++ net/pfkeyv2.c       25 Apr 2022 15:10:54 -0000
@@ -1969,8 +1969,8 @@ pfkeyv2_send(struct socket *so, void *me

                ipo->ipo_sproto = SADB_X_GETSPROTO(smsg->sadb_msg_satype);

-               if (ipo->ipo_ids) {
-                       ipsp_ids_free(ipo->ipo_ids);
+               if (ipo->ipo_ids != NULL) {
+                       ipsp_ids_unref(ipo->ipo_ids);
                        ipo->ipo_ids = NULL;
                }

@@ -2015,8 +2015,8 @@ pfkeyv2_send(struct socket *so, void *me
                                        ipo->ipo_tdb = NULL;
                                }
                                mtx_leave(&ipo_tdb_mtx);
-                               if (ipo->ipo_ids)
-                                       ipsp_ids_free(ipo->ipo_ids);
+                               if (ipo->ipo_ids != NULL)
+                                       ipsp_ids_unref(ipo->ipo_ids);
                                pool_put(&ipsec_policy_pool, ipo);
                                NET_UNLOCK();
                                goto ret;
Index: net/pfkeyv2_convert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2_convert.c,v
retrieving revision 1.79
diff -u -p -r1.79 pfkeyv2_convert.c
--- net/pfkeyv2_convert.c       20 Jan 2022 17:13:12 -0000      1.79
+++ net/pfkeyv2_convert.c       25 Apr 2022 15:10:54 -0000
@@ -752,6 +752,7 @@ import_identities(struct ipsec_ids **ids
        import_identity(&tmp->id_local, swapped ? dstid: srcid, &id_local_sz);
        import_identity(&tmp->id_remote, swapped ? srcid: dstid, &id_remote_sz);
        if (tmp->id_local != NULL && tmp->id_remote != NULL) {
+               /* ipsp_ids_insert increments refcount, ref stored in *ids */
                *ids = ipsp_ids_insert(tmp);
                if (*ids == tmp)
                        return;
Index: netinet/ip_ipsp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.269
diff -u -p -r1.269 ip_ipsp.c
--- netinet/ip_ipsp.c   10 Mar 2022 15:21:08 -0000      1.269
+++ netinet/ip_ipsp.c   25 Apr 2022 15:11:40 -0000
@@ -113,13 +113,6 @@ struct ipsec_ids_flows ipsec_ids_flows;
  struct ipsec_policy_head ipsec_policy_head =
      TAILQ_HEAD_INITIALIZER(ipsec_policy_head);

-void ipsp_ids_gc(void *);
-
-LIST_HEAD(, ipsec_ids) ipsp_ids_gc_list =
-    LIST_HEAD_INITIALIZER(ipsp_ids_gc_list);   /* [F] */
-struct timeout ipsp_ids_gc_timeout =
-    TIMEOUT_INITIALIZER_FLAGS(ipsp_ids_gc, NULL, TIMEOUT_PROC);
-
  static inline int ipsp_ids_cmp(const struct ipsec_ids *,
      const struct ipsec_ids *);
  static inline int ipsp_ids_flow_cmp(const struct ipsec_ids *,
@@ -1088,16 +1081,12 @@ tdb_free(struct tdb *tdbp)

        KASSERT(TAILQ_EMPTY(&tdbp->tdb_policy_head));

-       if (tdbp->tdb_ids) {
-               ipsp_ids_free(tdbp->tdb_ids);
-               tdbp->tdb_ids = NULL;
-       }
+       if (tdbp->tdb_ids != NULL)
+               ipsp_ids_unref(tdbp->tdb_ids);

  #if NPF > 0
-       if (tdbp->tdb_tag) {
+       if (tdbp->tdb_tag)
                pf_tag_unref(tdbp->tdb_tag);
-               tdbp->tdb_tag = 0;
-       }
  #endif

        counters_free(tdbp->tdb_counters, tdb_ncounters);
@@ -1204,19 +1193,13 @@ ipsp_ids_insert(struct ipsec_ids *ids)

        found = RBT_INSERT(ipsec_ids_tree, &ipsec_ids_tree, ids);
        if (found) {
-               /* if refcount was zero, then timeout is running */
-               if (atomic_inc_int_nv(&found->id_refcount) == 1) {
-                       LIST_REMOVE(found, id_gc_list);
-
-                       if (LIST_EMPTY(&ipsp_ids_gc_list))
-                               timeout_del(&ipsp_ids_gc_timeout);
-               }
+               refcnt_take(&found->id_refcnt);
                mtx_leave (&ipsec_flows_mtx);
                DPRINTF("ids %p count %d", found, found->id_refcount);
                return found;
        }

-       ids->id_refcount = 1;
+       refcnt_init(&ids->id_refcnt);
        ids->id_flow = start_flow = ipsec_ids_next_flow;

        if (++ipsec_ids_next_flow == 0)
@@ -1233,7 +1216,11 @@ ipsp_ids_insert(struct ipsec_ids *ids)
                        return NULL;
                }
        }
+       /* one ref count for the RB trees and one for the return value */
+       refcnt_take(&ids->id_refcnt);
+
        mtx_leave(&ipsec_flows_mtx);
+
        DPRINTF("new ids %p flow %u", ids, ids->id_flow);
        return ids;
  }
@@ -1248,71 +1235,26 @@ ipsp_ids_lookup(u_int32_t ipsecflowinfo)

        mtx_enter(&ipsec_flows_mtx);
        ids = RBT_FIND(ipsec_ids_flows, &ipsec_ids_flows, &key);
-       atomic_inc_int(&ids->id_refcount);
+       refcnt_take(&ids->id_refcnt);
        mtx_leave(&ipsec_flows_mtx);

        return ids;
  }

-/* free ids only from delayed timeout */
  void
-ipsp_ids_gc(void *arg)
+ipsp_ids_unref(struct ipsec_ids *ids)
  {
-       struct ipsec_ids *ids, *tids;
-
-       mtx_enter(&ipsec_flows_mtx);
-
-       LIST_FOREACH_SAFE(ids, &ipsp_ids_gc_list, id_gc_list, tids) {
-               KASSERT(ids->id_refcount == 0);
-               DPRINTF("ids %p count %d", ids, ids->id_refcount);
-
-               if ((--ids->id_gc_ttl) > 0)
-                       continue;
-
-               LIST_REMOVE(ids, id_gc_list);
-               RBT_REMOVE(ipsec_ids_tree, &ipsec_ids_tree, ids);
-               RBT_REMOVE(ipsec_ids_flows, &ipsec_ids_flows, ids);
-               free(ids->id_local, M_CREDENTIALS, 0);
-               free(ids->id_remote, M_CREDENTIALS, 0);
-               free(ids, M_CREDENTIALS, 0);
-       }
-
-       if (!LIST_EMPTY(&ipsp_ids_gc_list))
-               timeout_add_sec(&ipsp_ids_gc_timeout, 1);
-
-       mtx_leave(&ipsec_flows_mtx);
-}
-
-/* decrements refcount, actual free happens in gc */
-void
-ipsp_ids_free(struct ipsec_ids *ids)
-{
-       if (ids == NULL)
-               return;
-
-       /*
-        * If the refcount becomes zero, then a timeout is started. This
-        * timeout must be cancelled if refcount is increased from zero.
-        */
-       DPRINTF("ids %p count %d", ids, ids->id_refcount);
-       KASSERT(ids->id_refcount > 0);
-
-       if (atomic_dec_int_nv(&ids->id_refcount) > 0)
+       if (refcnt_rele(&ids->id_refcnt) == 0)
                return;

        mtx_enter(&ipsec_flows_mtx);
-
-       /*
-        * Add second for the case ipsp_ids_gc() is already running and
-        * awaits netlock to be released.
-        */
-       ids->id_gc_ttl = ipsec_ids_idle + 1;
-
-       if (LIST_EMPTY(&ipsp_ids_gc_list))
-               timeout_add_sec(&ipsp_ids_gc_timeout, 1);
-       LIST_INSERT_HEAD(&ipsp_ids_gc_list, ids, id_gc_list);
-
+       RBT_REMOVE(ipsec_ids_tree, &ipsec_ids_tree, ids);
+       RBT_REMOVE(ipsec_ids_flows, &ipsec_ids_flows, ids);
        mtx_leave(&ipsec_flows_mtx);
+
+       free(ids->id_local, M_CREDENTIALS, 0);
+       free(ids->id_remote, M_CREDENTIALS, 0);
+       free(ids, M_CREDENTIALS, sizeof(*ids));
  }

  static int
Index: netinet/ip_ipsp.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.238
diff -u -p -r1.238 ip_ipsp.h
--- netinet/ip_ipsp.h   21 Apr 2022 15:22:50 -0000      1.238
+++ netinet/ip_ipsp.h   25 Apr 2022 15:11:52 -0000
@@ -235,14 +235,12 @@ struct ipsec_id {
  };

  struct ipsec_ids {
-       LIST_ENTRY(ipsec_ids)   id_gc_list;     /* [F] */
        RBT_ENTRY(ipsec_ids)    id_node_id;     /* [F] */
        RBT_ENTRY(ipsec_ids)    id_node_flow;   /* [F] */
+       struct refcnt           id_refcnt;
        struct ipsec_id         *id_local;      /* [I] */
        struct ipsec_id         *id_remote;     /* [I] */
        u_int32_t               id_flow;        /* [I] */
-       u_int                   id_refcount;    /* [a] */
-       u_int                   id_gc_ttl;      /* [F] */
  };
  RBT_HEAD(ipsec_ids_flows, ipsec_ids);
  RBT_HEAD(ipsec_ids_tree, ipsec_ids);
@@ -674,7 +672,7 @@ int ipsp_aux_match(struct tdb *, struct
  int   ipsp_ids_match(struct ipsec_ids *, struct ipsec_ids *);
  struct ipsec_ids *ipsp_ids_insert(struct ipsec_ids *);
  struct ipsec_ids *ipsp_ids_lookup(u_int32_t);
-void   ipsp_ids_free(struct ipsec_ids *);
+void   ipsp_ids_unref(struct ipsec_ids *);

  void  ipsp_init(void);
  void  ipsec_init(void);
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.380
diff -u -p -r1.380 ip_output.c
--- netinet/ip_output.c 4 Jan 2022 06:32:39 -0000       1.380
+++ netinet/ip_output.c 25 Apr 2022 15:10:54 -0000
@@ -549,7 +549,8 @@ ip_output_ipsec_lookup(struct mbuf *m, i
                ids = ipsp_ids_lookup(ipsecflowinfo);
        error = ipsp_spd_lookup(m, AF_INET, hlen, IPSP_DIRECTION_OUT,
            NULL, inp, &tdb, ids);
-       ipsp_ids_free(ids);
+       if (ids != NULL)
+               ipsp_ids_unref(ids);
        if (error || tdb == NULL) {
                *tdbout = NULL;
                return error;
Index: netinet/ip_spd.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_spd.c,v
retrieving revision 1.115
diff -u -p -r1.115 ip_spd.c
--- netinet/ip_spd.c    13 Mar 2022 21:38:32 -0000      1.115
+++ netinet/ip_spd.c    25 Apr 2022 15:10:54 -0000
@@ -538,7 +538,7 @@ ipsp_spd_lookup(struct mbuf *m, int af,
                                goto nomatchin;

                        /* Match source/dest IDs. */
-                       if (ipo->ipo_ids)
+                       if (ipo->ipo_ids != NULL)
                                if (tdbp->tdb_ids == NULL ||
                                    !ipsp_ids_match(ipo->ipo_ids, 
tdbp->tdb_ids))
                                        goto nomatchin;
@@ -696,8 +696,8 @@ ipsec_delete_policy(struct ipsec_policy

        TAILQ_REMOVE(&ipsec_policy_head, ipo, ipo_list);

-       if (ipo->ipo_ids)
-               ipsp_ids_free(ipo->ipo_ids);
+       if (ipo->ipo_ids != NULL)
+               ipsp_ids_unref(ipo->ipo_ids);

        ipsec_in_use--;

I have some additions to this bug.

To workaround this issue, i disabled ipsec+l2tp and left only openbsd-openbsd tunnels, and for three days is does not crash. With ipsec+l2tp enabled, it crashed 7 times per 2 days.

I believe, now it's my duty to test your patch? Honestly, I'm not familiar with compiling openbsd kernel and applying patches, but i will do my best to test it.

Reply via email to