Hi, I would like to introduce an neighbor discovery mutex like ARP uses it. Note that this does not unlock nd6 form kernel lock yet, this will be done in some follow up diffs.
Lockless lookup in the hot path for ND6 will be harder than ARP as ln_state is always checked. Anyway let's get closer to that goal in small steps. ok? bluhm Index: netinet6/nd6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v retrieving revision 1.273 diff -u -p -r1.273 nd6.c --- netinet6/nd6.c 2 May 2023 06:06:13 -0000 1.273 +++ netinet6/nd6.c 2 May 2023 14:41:14 -0000 @@ -62,6 +62,15 @@ #include <netinet6/nd6.h> #include <netinet/icmp6.h> +/* + * Locks used to protect struct members in this file: + * a atomic operations + * I immutable after creation + * K kernel lock + * m nd6 mutex, needed when net lock is shared + * N net lock + */ + #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ @@ -84,9 +93,13 @@ int nd6_debug = 1; int nd6_debug = 0; #endif -TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list; -struct pool nd6_pool; /* pool for llinfo_nd6 structures */ -int nd6_inuse; +/* llinfo_nd6 live time, rt_llinfo and RTF_LLINFO are protected by nd6_mtx */ +struct mutex nd6_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); + +TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list = + TAILQ_HEAD_INITIALIZER(nd6_list); /* [mN] list of llinfo_nd6 structures */ +struct pool nd6_pool; /* [I] pool for llinfo_nd6 structures */ +int nd6_inuse; /* [m] limit neigbor discovery routes */ unsigned int ln_hold_total; /* [a] packets currently in the nd6 queue */ void nd6_timer(void *); @@ -105,7 +118,6 @@ struct task nd6_expire_task; void nd6_init(void) { - TAILQ_INIT(&nd6_list); pool_init(&nd6_pool, sizeof(struct llinfo_nd6), 0, IPL_SOFTNET, 0, "nd6", NULL); @@ -259,6 +271,7 @@ nd6_timer(void *unused) uptime = getuptime(); expire = uptime + nd6_gctimer; + /* Net lock is exclusive, no nd6 mutex needed for nd6_list here. */ TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) { struct rtentry *rt = ln->ln_rt; @@ -465,7 +478,7 @@ nd6_purge(struct ifnet *ifp) { struct llinfo_nd6 *ln, *nln; - NET_ASSERT_LOCKED(); + NET_ASSERT_LOCKED_EXCLUSIVE(); /* * Nuke neighbor cache entries for the ifp. @@ -627,13 +640,20 @@ nd6_is_addr_neighbor(const struct sockad void nd6_invalidate(struct rtentry *rt) { - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; + struct llinfo_nd6 *ln; struct sockaddr_dl *sdl = satosdl(rt->rt_gateway); + mtx_enter(&nd6_mtx); + ln = (struct llinfo_nd6 *)rt->rt_llinfo; + if (ln == NULL) { + mtx_leave(&nd6_mtx); + return; + } atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq)); sdl->sdl_alen = 0; ln->ln_state = ND6_LLINFO_INCOMPLETE; ln->ln_asked = 0; + mtx_leave(&nd6_mtx); } /* @@ -804,20 +824,26 @@ nd6_rtrequest(struct ifnet *ifp, int req } satosdl(gate)->sdl_type = ifp->if_type; satosdl(gate)->sdl_index = ifp->if_index; - if (ln != NULL) - break; /* This happens on a route change */ /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ ln = pool_get(&nd6_pool, PR_NOWAIT | PR_ZERO); - rt->rt_llinfo = (caddr_t)ln; if (ln == NULL) { log(LOG_DEBUG, "%s: pool get failed\n", __func__); break; } - mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET); + + mtx_enter(&nd6_mtx); + if (rt->rt_llinfo != NULL) { + /* we lost the race, another thread has entered it */ + mtx_leave(&nd6_mtx); + pool_put(&nd6_pool, ln); + break; + } nd6_inuse++; + mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET); + rt->rt_llinfo = (caddr_t)ln; ln->ln_rt = rt; /* this is required for "ndp" command. - shin */ if (req == RTM_ADD) { @@ -838,6 +864,7 @@ nd6_rtrequest(struct ifnet *ifp, int req } rt->rt_flags |= RTF_LLINFO; TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list); + mtx_leave(&nd6_mtx); /* * If we have too many cache entries, initiate immediate @@ -850,6 +877,7 @@ nd6_rtrequest(struct ifnet *ifp, int req nd6_inuse >= ip6_neighborgcthresh) { int i; + mtx_enter(&nd6_mtx); for (i = 0; i < 10; i++) { struct llinfo_nd6 *ln_end; @@ -870,6 +898,7 @@ nd6_rtrequest(struct ifnet *ifp, int req ln_end->ln_state = ND6_LLINFO_PURGE; nd6_llinfo_settimer(ln_end, 0); } + mtx_leave(&nd6_mtx); } /* @@ -901,6 +930,7 @@ nd6_rtrequest(struct ifnet *ifp, int req llsol.s6_addr32[2] = htonl(1); llsol.s6_addr8[12] = 0xff; + KERNEL_LOCK(); if (in6_addmulti(&llsol, ifp, &error)) { char addr[INET6_ADDRSTRLEN]; nd6log((LOG_ERR, "%s: failed to join " @@ -909,13 +939,29 @@ nd6_rtrequest(struct ifnet *ifp, int req addr, sizeof(addr)), error)); } + KERNEL_UNLOCK(); } } break; case RTM_DELETE: - if (ln == NULL) + mtx_enter(&nd6_mtx); + ln = (struct llinfo_nd6 *)rt->rt_llinfo; + if (ln == NULL) { + /* we lost the race, another thread has removed it */ + mtx_leave(&nd6_mtx); break; + } + nd6_inuse--; + TAILQ_REMOVE(&nd6_list, ln, ln_list); + rt->rt_expire = 0; + rt->rt_llinfo = NULL; + rt->rt_flags &= ~RTF_LLINFO; + atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq)); + mtx_leave(&nd6_mtx); + + pool_put(&nd6_pool, ln); + /* leave from solicited node multicast for proxy ND */ if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && (ifp->if_flags & IFF_MULTICAST) != 0) { @@ -929,22 +975,15 @@ nd6_rtrequest(struct ifnet *ifp, int req llsol.s6_addr32[2] = htonl(1); llsol.s6_addr8[12] = 0xff; + KERNEL_LOCK(); IN6_LOOKUP_MULTI(llsol, ifp, in6m); if (in6m) in6_delmulti(in6m); + KERNEL_UNLOCK(); } - nd6_inuse--; - TAILQ_REMOVE(&nd6_list, ln, ln_list); - rt->rt_expire = 0; - rt->rt_llinfo = NULL; - rt->rt_flags &= ~RTF_LLINFO; - atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq)); - pool_put(&nd6_pool, ln); break; case RTM_INVALIDATE: - if (ln == NULL) - break; if (!ISSET(rt->rt_flags, RTF_LOCAL)) nd6_invalidate(rt); break; @@ -1299,8 +1338,10 @@ nd6_resolve(struct ifnet *ifp, struct rt * for this entry to be a target of forced garbage collection (see * nd6_rtrequest()). */ + mtx_enter(&nd6_mtx); TAILQ_REMOVE(&nd6_list, ln, ln_list); TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list); + mtx_leave(&nd6_mtx); /* * The first time we send a packet to a neighbor whose entry is Index: netinet6/nd6.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.h,v retrieving revision 1.98 diff -u -p -r1.98 nd6.h --- netinet6/nd6.h 2 May 2023 06:06:13 -0000 1.98 +++ netinet6/nd6.h 2 May 2023 14:41:51 -0000 @@ -44,7 +44,10 @@ #define ND6_LLINFO_PROBE 4 /* - * Locks used to protect struct members in this file: + * Locks used to protect struct members in this file: + * I immutable after creation + * K kernel lock + * m nd6 mutex, needed when net lock is shared * N net lock */ @@ -79,8 +82,8 @@ struct in6_ndireq { #include <sys/queue.h> struct llinfo_nd6 { - TAILQ_ENTRY(llinfo_nd6) ln_list; - struct rtentry *ln_rt; + TAILQ_ENTRY(llinfo_nd6) ln_list; /* [mN] global nd6_list */ + struct rtentry *ln_rt; /* [I] backpointer to rtentry */ struct mbuf_queue ln_mq; /* hold packets until resolved */ struct in6_addr ln_saddr6; /* source of prompting packet */ long ln_asked; /* number of queries already sent for addr */ Index: netinet6/nd6_nbr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6_nbr.c,v retrieving revision 1.147 diff -u -p -r1.147 nd6_nbr.c --- netinet6/nd6_nbr.c 2 May 2023 06:06:13 -0000 1.147 +++ netinet6/nd6_nbr.c 2 May 2023 14:45:25 -0000 @@ -567,7 +567,7 @@ nd6_na_input(struct mbuf *m, int off, in struct nd_opts ndopts; char addr[INET6_ADDRSTRLEN], addr0[INET6_ADDRSTRLEN]; - NET_ASSERT_LOCKED(); + NET_ASSERT_LOCKED_EXCLUSIVE(); ifp = if_get(m->m_pkthdr.ph_ifidx); if (ifp == NULL)