Hi,

I would like to introduce an neighbor discovery mutex like ARP uses
it.  Note that this does not unlock nd6 form kernel lock yet, this
will be done in some follow up diffs.

Lockless lookup in the hot path for ND6 will be harder than ARP as
ln_state is always checked.  Anyway let's get closer to that goal
in small steps.

ok?

bluhm

Index: netinet6/nd6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.273
diff -u -p -r1.273 nd6.c
--- netinet6/nd6.c      2 May 2023 06:06:13 -0000       1.273
+++ netinet6/nd6.c      2 May 2023 14:41:14 -0000
@@ -62,6 +62,15 @@
 #include <netinet6/nd6.h>
 #include <netinet/icmp6.h>
 
+/*
+ * Locks used to protect struct members in this file:
+ *     a       atomic operations
+ *     I       immutable after creation
+ *     K       kernel lock
+ *     m       nd6 mutex, needed when net lock is shared
+ *     N       net lock
+ */
+
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
@@ -84,9 +93,13 @@ int nd6_debug = 1;
 int nd6_debug = 0;
 #endif
 
-TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list;
-struct pool nd6_pool;          /* pool for llinfo_nd6 structures */
-int    nd6_inuse;
+/* llinfo_nd6 live time, rt_llinfo and RTF_LLINFO are protected by nd6_mtx */
+struct mutex nd6_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+
+TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list =
+    TAILQ_HEAD_INITIALIZER(nd6_list);  /* [mN] list of llinfo_nd6 structures */
+struct pool nd6_pool;          /* [I] pool for llinfo_nd6 structures */
+int    nd6_inuse;              /* [m] limit neigbor discovery routes */
 unsigned int   ln_hold_total;  /* [a] packets currently in the nd6 queue */
 
 void nd6_timer(void *);
@@ -105,7 +118,6 @@ struct task nd6_expire_task;
 void
 nd6_init(void)
 {
-       TAILQ_INIT(&nd6_list);
        pool_init(&nd6_pool, sizeof(struct llinfo_nd6), 0,
            IPL_SOFTNET, 0, "nd6", NULL);
 
@@ -259,6 +271,7 @@ nd6_timer(void *unused)
        uptime = getuptime();
        expire = uptime + nd6_gctimer;
 
+       /* Net lock is exclusive, no nd6 mutex needed for nd6_list here. */
        TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
                struct rtentry *rt = ln->ln_rt;
 
@@ -465,7 +478,7 @@ nd6_purge(struct ifnet *ifp)
 {
        struct llinfo_nd6 *ln, *nln;
 
-       NET_ASSERT_LOCKED();
+       NET_ASSERT_LOCKED_EXCLUSIVE();
 
        /*
         * Nuke neighbor cache entries for the ifp.
@@ -627,13 +640,20 @@ nd6_is_addr_neighbor(const struct sockad
 void
 nd6_invalidate(struct rtentry *rt)
 {
-       struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+       struct llinfo_nd6 *ln;
        struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
 
+       mtx_enter(&nd6_mtx);
+       ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+       if (ln == NULL) {
+               mtx_leave(&nd6_mtx);
+               return;
+       }
        atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
        sdl->sdl_alen = 0;
        ln->ln_state = ND6_LLINFO_INCOMPLETE;
        ln->ln_asked = 0;
+       mtx_leave(&nd6_mtx);
 }
 
 /*
@@ -804,20 +824,26 @@ nd6_rtrequest(struct ifnet *ifp, int req
                }
                satosdl(gate)->sdl_type = ifp->if_type;
                satosdl(gate)->sdl_index = ifp->if_index;
-               if (ln != NULL)
-                       break;  /* This happens on a route change */
                /*
                 * Case 2: This route may come from cloning, or a manual route
                 * add with a LL address.
                 */
                ln = pool_get(&nd6_pool, PR_NOWAIT | PR_ZERO);
-               rt->rt_llinfo = (caddr_t)ln;
                if (ln == NULL) {
                        log(LOG_DEBUG, "%s: pool get failed\n", __func__);
                        break;
                }
-               mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET);
+
+               mtx_enter(&nd6_mtx);
+               if (rt->rt_llinfo != NULL) {
+                       /* we lost the race, another thread has entered it */
+                       mtx_leave(&nd6_mtx);
+                       pool_put(&nd6_pool, ln);
+                       break;
+               }
                nd6_inuse++;
+               mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET);
+               rt->rt_llinfo = (caddr_t)ln;
                ln->ln_rt = rt;
                /* this is required for "ndp" command. - shin */
                if (req == RTM_ADD) {
@@ -838,6 +864,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                }
                rt->rt_flags |= RTF_LLINFO;
                TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list);
+               mtx_leave(&nd6_mtx);
 
                /*
                 * If we have too many cache entries, initiate immediate
@@ -850,6 +877,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                    nd6_inuse >= ip6_neighborgcthresh) {
                        int i;
 
+                       mtx_enter(&nd6_mtx);
                        for (i = 0; i < 10; i++) {
                                struct llinfo_nd6 *ln_end;
 
@@ -870,6 +898,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                                        ln_end->ln_state = ND6_LLINFO_PURGE;
                                nd6_llinfo_settimer(ln_end, 0);
                        }
+                       mtx_leave(&nd6_mtx);
                }
 
                /*
@@ -901,6 +930,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                                llsol.s6_addr32[2] = htonl(1);
                                llsol.s6_addr8[12] = 0xff;
 
+                               KERNEL_LOCK();
                                if (in6_addmulti(&llsol, ifp, &error)) {
                                        char addr[INET6_ADDRSTRLEN];
                                        nd6log((LOG_ERR, "%s: failed to join "
@@ -909,13 +939,29 @@ nd6_rtrequest(struct ifnet *ifp, int req
                                                addr, sizeof(addr)),
                                            error));
                                }
+                               KERNEL_UNLOCK();
                        }
                }
                break;
 
        case RTM_DELETE:
-               if (ln == NULL)
+               mtx_enter(&nd6_mtx);
+               ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+               if (ln == NULL) {
+                       /* we lost the race, another thread has removed it */
+                       mtx_leave(&nd6_mtx);
                        break;
+               }
+               nd6_inuse--;
+               TAILQ_REMOVE(&nd6_list, ln, ln_list);
+               rt->rt_expire = 0;
+               rt->rt_llinfo = NULL;
+               rt->rt_flags &= ~RTF_LLINFO;
+               atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
+               mtx_leave(&nd6_mtx);
+
+               pool_put(&nd6_pool, ln);
+
                /* leave from solicited node multicast for proxy ND */
                if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
                    (ifp->if_flags & IFF_MULTICAST) != 0) {
@@ -929,22 +975,15 @@ nd6_rtrequest(struct ifnet *ifp, int req
                        llsol.s6_addr32[2] = htonl(1);
                        llsol.s6_addr8[12] = 0xff;
 
+                       KERNEL_LOCK();
                        IN6_LOOKUP_MULTI(llsol, ifp, in6m);
                        if (in6m)
                                in6_delmulti(in6m);
+                       KERNEL_UNLOCK();
                }
-               nd6_inuse--;
-               TAILQ_REMOVE(&nd6_list, ln, ln_list);
-               rt->rt_expire = 0;
-               rt->rt_llinfo = NULL;
-               rt->rt_flags &= ~RTF_LLINFO;
-               atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
-               pool_put(&nd6_pool, ln);
                break;
 
        case RTM_INVALIDATE:
-               if (ln == NULL)
-                       break;
                if (!ISSET(rt->rt_flags, RTF_LOCAL))
                        nd6_invalidate(rt);
                break;
@@ -1299,8 +1338,10 @@ nd6_resolve(struct ifnet *ifp, struct rt
         * for this entry to be a target of forced garbage collection (see
         * nd6_rtrequest()).
         */
+       mtx_enter(&nd6_mtx);
        TAILQ_REMOVE(&nd6_list, ln, ln_list);
        TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list);
+       mtx_leave(&nd6_mtx);
 
        /*
         * The first time we send a packet to a neighbor whose entry is
Index: netinet6/nd6.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.h,v
retrieving revision 1.98
diff -u -p -r1.98 nd6.h
--- netinet6/nd6.h      2 May 2023 06:06:13 -0000       1.98
+++ netinet6/nd6.h      2 May 2023 14:41:51 -0000
@@ -44,7 +44,10 @@
 #define ND6_LLINFO_PROBE       4
 
 /*
- *  Locks used to protect struct members in this file:
+ * Locks used to protect struct members in this file:
+ *     I       immutable after creation
+ *     K       kernel lock
+ *     m       nd6 mutex, needed when net lock is shared
  *     N       net lock
  */
 
@@ -79,8 +82,8 @@ struct        in6_ndireq {
 #include <sys/queue.h>
 
 struct llinfo_nd6 {
-       TAILQ_ENTRY(llinfo_nd6) ln_list;
-       struct  rtentry *ln_rt;
+       TAILQ_ENTRY(llinfo_nd6)  ln_list;       /* [mN] global nd6_list */
+       struct  rtentry         *ln_rt;         /* [I] backpointer to rtentry */
        struct  mbuf_queue ln_mq;       /* hold packets until resolved */
        struct  in6_addr ln_saddr6;     /* source of prompting packet */
        long    ln_asked;       /* number of queries already sent for addr */
Index: netinet6/nd6_nbr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6_nbr.c,v
retrieving revision 1.147
diff -u -p -r1.147 nd6_nbr.c
--- netinet6/nd6_nbr.c  2 May 2023 06:06:13 -0000       1.147
+++ netinet6/nd6_nbr.c  2 May 2023 14:45:25 -0000
@@ -567,7 +567,7 @@ nd6_na_input(struct mbuf *m, int off, in
        struct nd_opts ndopts;
        char addr[INET6_ADDRSTRLEN], addr0[INET6_ADDRSTRLEN];
 
-       NET_ASSERT_LOCKED();
+       NET_ASSERT_LOCKED_EXCLUSIVE();
 
        ifp = if_get(m->m_pkthdr.ph_ifidx);
        if (ifp == NULL)

Reply via email to