Module Name:    src
Committed By:   ozaki-r
Date:           Wed Mar  1 09:09:37 UTC 2017

Modified Files:
        src/sys/netinet6: mld6.c

Log Message:
Make IPv6 multicast MP-safe partially

To complete the task, we need to make users of IPv6 multicast MP-safe, for
example socket/PCB and CARP.


To generate a diff of this commit:
cvs rdiff -u -r1.84 -r1.85 src/sys/netinet6/mld6.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/netinet6/mld6.c
diff -u src/sys/netinet6/mld6.c:1.84 src/sys/netinet6/mld6.c:1.85
--- src/sys/netinet6/mld6.c:1.84	Wed Mar  1 08:54:12 2017
+++ src/sys/netinet6/mld6.c	Wed Mar  1 09:09:37 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: mld6.c,v 1.84 2017/03/01 08:54:12 ozaki-r Exp $	*/
+/*	$NetBSD: mld6.c,v 1.85 2017/03/01 09:09:37 ozaki-r Exp $	*/
 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
 
 /*
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.84 2017/03/01 08:54:12 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.85 2017/03/01 09:09:37 ozaki-r Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -119,6 +119,7 @@ __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.8
 #include <sys/kernel.h>
 #include <sys/callout.h>
 #include <sys/cprng.h>
+#include <sys/rwlock.h>
 
 #include <net/if.h>
 
@@ -135,6 +136,8 @@ __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.8
 #include <net/net_osdep.h>
 
 
+static krwlock_t	in6_multilock __cacheline_aligned;
+
 /*
  * Protocol constants
  */
@@ -157,6 +160,10 @@ static void mld_starttimer(struct in6_mu
 static void mld_stoptimer(struct in6_multi *);
 static u_long mld_timerresid(struct in6_multi *);
 
+static void in6m_ref(struct in6_multi *);
+static void in6m_unref(struct in6_multi *);
+static void in6m_destroy(struct in6_multi *);
+
 void
 mld_init(void)
 {
@@ -178,6 +185,8 @@ mld_init(void)
 	/* We will specify the hoplimit by a multicast option. */
 	ip6_opts.ip6po_hlim = -1;
 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
+
+	rw_init(&in6_multilock);
 }
 
 static void
@@ -185,6 +194,7 @@ mld_starttimer(struct in6_multi *in6m)
 {
 	struct timeval now;
 
+	KASSERT(rw_write_held(&in6_multilock));
 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
 
 	microtime(&now);
@@ -200,13 +210,27 @@ mld_starttimer(struct in6_multi *in6m)
 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
 }
 
+/*
+ * mld_stoptimer releases in6_multilock when calling callout_halt.
+ * The caller must ensure in6m won't be freed while releasing the lock.
+ */
 static void
 mld_stoptimer(struct in6_multi *in6m)
 {
+
+	KASSERT(rw_write_held(&in6_multilock));
+
 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
 		return;
 
-	callout_stop(&in6m->in6m_timer_ch);
+	rw_exit(&in6_multilock);
+
+	if (mutex_owned(softnet_lock))
+		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
+	else
+		callout_halt(&in6m->in6m_timer_ch, NULL);
+
+	rw_enter(&in6_multilock, RW_WRITER);
 
 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
 }
@@ -216,10 +240,13 @@ mld_timeo(void *arg)
 {
 	struct in6_multi *in6m = arg;
 
-	/* XXX NOMPSAFE still need softnet_lock */
+	KASSERT(in6m->in6m_refcount > 0);
+
+#ifndef NET_MPSAFE
 	mutex_enter(softnet_lock);
 	KERNEL_LOCK(1, NULL);
-
+#endif
+	rw_enter(&in6_multilock, RW_WRITER);
 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
 		goto out;
 
@@ -235,8 +262,13 @@ mld_timeo(void *arg)
 	}
 
 out:
+	rw_exit(&in6_multilock);
+#ifndef NET_MPSAFE
 	KERNEL_UNLOCK_ONE(NULL);
 	mutex_exit(softnet_lock);
+#else
+	return;
+#endif
 }
 
 static u_long
@@ -268,6 +300,8 @@ mld_start_listening(struct in6_multi *in
 {
 	struct in6_addr all_in6;
 
+	KASSERT(rw_write_held(&in6_multilock));
+
 	/*
 	 * RFC2710 page 10:
 	 * The node never sends a Report or Done for the link-scope all-nodes
@@ -300,6 +334,8 @@ mld_stop_listening(struct in6_multi *in6
 {
 	struct in6_addr allnode, allrouter;
 
+	KASSERT(rw_lock_held(&in6_multilock));
+
 	allnode = in6addr_linklocal_allnodes;
 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
 		/* XXX: this should not happen! */
@@ -395,6 +431,8 @@ mld_input(struct mbuf *m, int off)
 	 */
 	switch (mldh->mld_type) {
 	case MLD_LISTENER_QUERY: {
+		struct in6_multi *next;
+
 		if (ifp->if_flags & IFF_LOOPBACK)
 			break;
 
@@ -420,7 +458,17 @@ mld_input(struct mbuf *m, int off)
 		 */
 		timer = ntohs(mldh->mld_maxdelay);
 
-		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
+		rw_enter(&in6_multilock, RW_WRITER);
+		/*
+		 * mld_stoptimer and mld_sendpkt release in6_multilock
+		 * temporarily, so we have to prevent in6m from being freed
+		 * while releasing the lock by having an extra reference to it.
+		 *
+		 * Also in6_purge_multi might remove items from the list of the
+		 * ifp while releasing the lock. Fortunately in6_purge_multi is
+		 * never executed as long as we have a psref of the ifp.
+		 */
+		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
 			    IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -434,10 +482,14 @@ mld_input(struct mbuf *m, int off)
 				continue;
 
 			if (timer == 0) {
+				in6m_ref(in6m);
+
 				/* send a report immediately */
 				mld_stoptimer(in6m);
 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
 				in6m->in6m_state = MLD_IREPORTEDLAST;
+
+				in6m_unref(in6m); /* May free in6m */
 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
 			    mld_timerresid(in6m) > timer) {
 				in6m->in6m_timer =
@@ -445,6 +497,7 @@ mld_input(struct mbuf *m, int off)
 				mld_starttimer(in6m);
 			}
 		}
+		rw_exit(&in6_multilock);
 		break;
 	    }
 
@@ -468,11 +521,16 @@ mld_input(struct mbuf *m, int off)
 		 * If we belong to the group being reported, stop
 		 * our timer for that group.
 		 */
+		rw_enter(&in6_multilock, RW_WRITER);
 		in6m = in6_lookup_multi(&mld_addr, ifp);
 		if (in6m) {
+			in6m_ref(in6m);
 			mld_stoptimer(in6m); /* transit to idle state */
 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
+			in6m_unref(in6m);
+			in6m = NULL; /* in6m might be freed */
 		}
+		rw_exit(&in6_multilock);
 		break;
 	default:		/* this is impossible */
 #if 0
@@ -492,6 +550,11 @@ out_nodrop:
 	m_put_rcvif_psref(ifp, &psref);
 }
 
+/*
+ * XXX mld_sendpkt must be called with in6_multilock held and
+ * will release in6_multilock before calling ip6_output and
+ * returning to avoid locking against myself in ip6_output.
+ */
 static void
 mld_sendpkt(struct in6_multi *in6m, int type, 
 	const struct in6_addr *dst)
@@ -506,6 +569,8 @@ mld_sendpkt(struct in6_multi *in6m, int 
 	struct psref psref;
 	int bound;
 
+	KASSERT(rw_write_held(&in6_multilock));
+
 	/*
 	 * At first, find a link local address on the outgoing interface
 	 * to use as the source address of the MLD packet.
@@ -570,8 +635,13 @@ mld_sendpkt(struct in6_multi *in6m, int 
 		break;
 	}
 
+	/* XXX we cannot call ip6_output with holding in6_multilock */
+	rw_exit(&in6_multilock);
+
 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
 	    &im6o, NULL, NULL);
+
+	rw_enter(&in6_multilock, RW_WRITER);
 }
 
 static struct mld_hdr *
@@ -623,6 +693,23 @@ mld_allocbuf(struct mbuf **mh, int len, 
 	return mldh;
 }
 
+static void
+in6m_ref(struct in6_multi *in6m)
+{
+
+	KASSERT(rw_write_held(&in6_multilock));
+	in6m->in6m_refcount++;
+}
+
+static void
+in6m_unref(struct in6_multi *in6m)
+{
+
+	KASSERT(rw_write_held(&in6_multilock));
+	if (--in6m->in6m_refcount == 0)
+		in6m_destroy(in6m);
+}
+
 /*
  * Add an address to the list of IP6 multicast addresses for a given interface.
  */
@@ -632,10 +719,10 @@ in6_addmulti(struct in6_addr *maddr6, st
 {
 	struct	sockaddr_in6 sin6;
 	struct	in6_multi *in6m;
-	int	s = splsoftnet();
 
 	*errorp = 0;
 
+	rw_enter(&in6_multilock, RW_WRITER);
 	/*
 	 * See if address already in list.
 	 */
@@ -653,9 +740,8 @@ in6_addmulti(struct in6_addr *maddr6, st
 		in6m = (struct in6_multi *)
 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
 		if (in6m == NULL) {
-			splx(s);
 			*errorp = ENOBUFS;
-			return (NULL);
+			goto out;
 		}
 
 		in6m->in6m_addr = *maddr6;
@@ -665,7 +751,6 @@ in6_addmulti(struct in6_addr *maddr6, st
 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
 
-		/* FIXME NOMPSAFE: need to lock */
 		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
 
 		/*
@@ -678,17 +763,15 @@ in6_addmulti(struct in6_addr *maddr6, st
 			callout_destroy(&in6m->in6m_timer_ch);
 			LIST_REMOVE(in6m, in6m_entry);
 			free(in6m, M_IPMADDR);
-			splx(s);
-			return (NULL);
+			in6m = NULL;
+			goto out;
 		}
 
 		in6m->in6m_timer = timer;
 		if (in6m->in6m_timer > 0) {
 			in6m->in6m_state = MLD_REPORTPENDING;
 			mld_starttimer(in6m);
-
-			splx(s);
-			return (in6m);
+			goto out;
 		}
 
 		/*
@@ -697,69 +780,82 @@ in6_addmulti(struct in6_addr *maddr6, st
 		 */
 		mld_start_listening(in6m);
 	}
-	splx(s);
-	return (in6m);
+out:
+	rw_exit(&in6_multilock);
+	return in6m;
 }
 
-/*
- * Delete a multicast address record.
- */
-void
-in6_delmulti(struct in6_multi *in6m)
+static void
+in6m_destroy(struct in6_multi *in6m)
 {
-	struct	sockaddr_in6 sin6;
 	struct	in6_ifaddr *ia;
-	int	s = splsoftnet();
-
-	mld_stoptimer(in6m);
+	struct sockaddr_in6 sin6;
+	int s;
 
-	if (--in6m->in6m_refcount == 0) {
-		int _s;
+	KASSERT(rw_write_held(&in6_multilock));
+	KASSERT(in6m->in6m_refcount == 0);
 
-		/*
-		 * No remaining claims to this record; let MLD6 know
-		 * that we are leaving the multicast group.
-		 */
-		mld_stop_listening(in6m);
+	/*
+	 * No remaining claims to this record; let MLD6 know
+	 * that we are leaving the multicast group.
+	 */
+	mld_stop_listening(in6m);
 
-		/*
-		 * Unlink from list.
-		 */
-		/* FIXME NOMPSAFE: need to lock */
-		LIST_REMOVE(in6m, in6m_entry);
+	/*
+	 * Unlink from list.
+	 */
+	LIST_REMOVE(in6m, in6m_entry);
 
-		/*
-		 * Delete all references of this multicasting group from
-		 * the membership arrays
-		 */
-		_s = pserialize_read_enter();
-		IN6_ADDRLIST_READER_FOREACH(ia) {
-			struct in6_multi_mship *imm;
-			LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
-				if (imm->i6mm_maddr == in6m)
-					imm->i6mm_maddr = NULL;
-			}
+	/*
+	 * Delete all references of this multicasting group from
+	 * the membership arrays
+	 */
+	s = pserialize_read_enter();
+	IN6_ADDRLIST_READER_FOREACH(ia) {
+		struct in6_multi_mship *imm;
+		LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
+			if (imm->i6mm_maddr == in6m)
+				imm->i6mm_maddr = NULL;
 		}
-		pserialize_read_exit(_s);
+	}
+	pserialize_read_exit(s);
 
-		/*
-		 * Notify the network driver to update its multicast
-		 * reception filter.
-		 */
-		sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
-		if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
+	/*
+	 * Notify the network driver to update its multicast
+	 * reception filter.
+	 */
+	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
+	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
 
-		/* Tell mld_timeo we're halting the timer */
-		in6m->in6m_timer = IN6M_TIMER_UNDEF;
-		if (mutex_owned(softnet_lock))
-			callout_halt(&in6m->in6m_timer_ch, softnet_lock);
-		else
-			callout_halt(&in6m->in6m_timer_ch, NULL);
-		callout_destroy(&in6m->in6m_timer_ch);
+	/* Tell mld_timeo we're halting the timer */
+	in6m->in6m_timer = IN6M_TIMER_UNDEF;
+	if (mutex_owned(softnet_lock))
+		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
+	else
+		callout_halt(&in6m->in6m_timer_ch, NULL);
+	callout_destroy(&in6m->in6m_timer_ch);
 
-		free(in6m, M_IPMADDR);
-	}
-	splx(s);
+	free(in6m, M_IPMADDR);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+void
+in6_delmulti(struct in6_multi *in6m)
+{
+
+	KASSERT(in6m->in6m_refcount > 0);
+
+	rw_enter(&in6_multilock, RW_WRITER);
+	/*
+	 * The caller should have a reference to in6m. So we don't need to care
+	 * of releasing the lock in mld_stoptimer.
+	 */
+	mld_stoptimer(in6m);
+	if (--in6m->in6m_refcount == 0)
+		in6m_destroy(in6m);
+	rw_exit(&in6_multilock);
 }
 
 /*
@@ -772,7 +868,8 @@ in6_lookup_multi(const struct in6_addr *
 {
 	struct in6_multi *in6m;
 
-	/* XXX NOMPSAFE */
+	KASSERT(rw_lock_held(&in6_multilock));
+
 	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
 		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
 			break;
@@ -785,7 +882,9 @@ in6_multi_group(const struct in6_addr *a
 {
 	bool ingroup;
 
+	rw_enter(&in6_multilock, RW_READER);
 	ingroup = in6_lookup_multi(addr, ifp) != NULL;
+	rw_exit(&in6_multilock);
 
 	return ingroup;
 }
@@ -798,10 +897,18 @@ in6_purge_multi(struct ifnet *ifp)
 {
 	struct in6_multi *in6m, *next;
 
-	/* XXX NOMPSAFE */
+	rw_enter(&in6_multilock, RW_WRITER);
 	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
-		in6_delmulti(in6m);
+		/*
+		 * Normally multicast addresses are already purged at this
+		 * point. Remaining references aren't accessible via ifp,
+		 * so what we can do here is to prevent ifp from being
+		 * accessed via in6m by removing it from the list of ifp.
+		 */
+		mld_stoptimer(in6m);
+		LIST_REMOVE(in6m, in6m_entry);
 	}
+	rw_exit(&in6_multilock);
 }
 
 struct in6_multi_mship *
@@ -865,10 +972,13 @@ in6_multicast_sysctl(SYSCTLFN_ARGS)
 	if (namelen != 1)
 		return EINVAL;
 
+	rw_enter(&in6_multilock, RW_READER);
+
 	bound = curlwp_bind();
 	ifp = if_get_byindex(name[0], &psref);
 	if (ifp == NULL) {
 		curlwp_bindx(bound);
+		rw_exit(&in6_multilock);
 		return ENODEV;
 	}
 
@@ -884,6 +994,7 @@ in6_multicast_sysctl(SYSCTLFN_ARGS)
 		pserialize_read_exit(s);
 		if_put(ifp, &psref);
 		curlwp_bindx(bound);
+		rw_exit(&in6_multilock);
 		return 0;
 	}
 
@@ -938,6 +1049,7 @@ done:
 	ifa_release(ifa, &psref_ia);
 	if_put(ifp, &psref);
 	curlwp_bindx(bound);
+	rw_exit(&in6_multilock);
 	*oldlenp = written;
 	return error;
 }

Reply via email to