Module Name:    src
Committed By:   rmind
Date:           Thu May 29 23:02:48 UTC 2014

Modified Files:
        src/sys/netinet: igmp.c igmp.h igmp_var.h in.c in_var.h ip_carp.c
            ip_input.c ip_output.c ip_var.h

Log Message:
Make IGMP and multicast group management code MP-safe.  Use a read-write
lock to protect the hash table of multicast address records; also, make it
private and eliminate some macros.  In the long term, the lookup path ought
to be optimised.


To generate a diff of this commit:
cvs rdiff -u -r1.54 -r1.55 src/sys/netinet/igmp.c
cvs rdiff -u -r1.11 -r1.12 src/sys/netinet/igmp.h
cvs rdiff -u -r1.23 -r1.24 src/sys/netinet/igmp_var.h
cvs rdiff -u -r1.145 -r1.146 src/sys/netinet/in.c
cvs rdiff -u -r1.67 -r1.68 src/sys/netinet/in_var.h
cvs rdiff -u -r1.55 -r1.56 src/sys/netinet/ip_carp.c
cvs rdiff -u -r1.315 -r1.316 src/sys/netinet/ip_input.c
cvs rdiff -u -r1.227 -r1.228 src/sys/netinet/ip_output.c
cvs rdiff -u -r1.103 -r1.104 src/sys/netinet/ip_var.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/netinet/igmp.c
diff -u src/sys/netinet/igmp.c:1.54 src/sys/netinet/igmp.c:1.55
--- src/sys/netinet/igmp.c:1.54	Tue Feb 25 18:30:12 2014
+++ src/sys/netinet/igmp.c	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: igmp.c,v 1.54 2014/02/25 18:30:12 pooka Exp $	*/
+/*	$NetBSD: igmp.c,v 1.55 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.54 2014/02/25 18:30:12 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.55 2014/05/29 23:02:48 rmind Exp $");
 
 #include "opt_mrouting.h"
 
@@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.5
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/systm.h>
+#include <sys/cprng.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
@@ -64,83 +65,107 @@ __KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.5
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
 
-#define IP_MULTICASTOPTS	0
-
-static struct pool igmp_rti_pool;
+/*
+ * Per-interface router version information.
+ */
+typedef struct router_info {
+	LIST_ENTRY(router_info) rti_link;
+	ifnet_t *	rti_ifp;
+	int		rti_type;	/* type of router on this interface */
+	int		rti_age;	/* time since last v1 query */
+} router_info_t;
 
-static percpu_t *igmpstat_percpu;
+/*
+ * The router-info list and the timer flag are protected by in_multilock.
+ *
+ * Lock order:
+ *
+ *	softnet_lock ->
+ *		in_multilock
+ */
+static struct pool	igmp_rti_pool		__cacheline_aligned;
+static LIST_HEAD(, router_info)	rti_head	__cacheline_aligned;
+static int		igmp_timers_on		__cacheline_aligned;
+static percpu_t *	igmpstat_percpu		__read_mostly;
 
 #define	IGMP_STATINC(x)		_NET_STATINC(igmpstat_percpu, x)
 
-int igmp_timers_are_running;
-static LIST_HEAD(, router_info) rti_head = LIST_HEAD_INITIALIZER(rti_head);
-
-void igmp_sendpkt(struct in_multi *, int);
-static int rti_fill(struct in_multi *);
-static struct router_info *rti_find(struct ifnet *);
-static void rti_delete(struct ifnet *);
-
-static void sysctl_net_inet_igmp_setup(struct sysctllog **);
+static void		igmp_sendpkt(struct in_multi *, int);
+static int		rti_fill(struct in_multi *);
+static router_info_t *	rti_find(struct ifnet *);
+static void		rti_delete(struct ifnet *);
+static void		sysctl_net_inet_igmp_setup(struct sysctllog **);
 
+/*
+ * rti_fill: associate router information with the given multicast group;
+ * if there is no router information for the interface, then create it.
+ */
 static int
 rti_fill(struct in_multi *inm)
 {
-	struct router_info *rti;
+	router_info_t *rti;
+
+	KASSERT(in_multi_lock_held());
 
-	/* this function is called at splsoftnet() */
 	LIST_FOREACH(rti, &rti_head, rti_link) {
 		if (rti->rti_ifp == inm->inm_ifp) {
 			inm->inm_rti = rti;
-			if (rti->rti_type == IGMP_v1_ROUTER)
-				return (IGMP_v1_HOST_MEMBERSHIP_REPORT);
-			else
-				return (IGMP_v2_HOST_MEMBERSHIP_REPORT);
+			return rti->rti_type == IGMP_v1_ROUTER ?
+			    IGMP_v1_HOST_MEMBERSHIP_REPORT :
+			    IGMP_v2_HOST_MEMBERSHIP_REPORT;
 		}
 	}
-
 	rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
-	if (rti == NULL)
+	if (rti == NULL) {
 		return 0;
+	}
 	rti->rti_ifp = inm->inm_ifp;
 	rti->rti_type = IGMP_v2_ROUTER;
 	LIST_INSERT_HEAD(&rti_head, rti, rti_link);
 	inm->inm_rti = rti;
-	return (IGMP_v2_HOST_MEMBERSHIP_REPORT);
+	return IGMP_v2_HOST_MEMBERSHIP_REPORT;
 }
 
-static struct router_info *
-rti_find(struct ifnet *ifp)
+/*
+ * rti_find: lookup or create router information for the given interface.
+ */
+static router_info_t *
+rti_find(ifnet_t *ifp)
 {
-	struct router_info *rti;
-	int s = splsoftnet();
+	router_info_t *rti;
+
+	KASSERT(in_multi_lock_held());
 
 	LIST_FOREACH(rti, &rti_head, rti_link) {
 		if (rti->rti_ifp == ifp)
-			return (rti);
+			return rti;
 	}
-
 	rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
 	if (rti == NULL) {
-		splx(s);
 		return NULL;
 	}
 	rti->rti_ifp = ifp;
 	rti->rti_type = IGMP_v2_ROUTER;
 	LIST_INSERT_HEAD(&rti_head, rti, rti_link);
-	splx(s);
-	return (rti);
+	return rti;
 }
 
+/*
+ * rti_delete: remove and free the router information entry for the
+ * given interface.
+ */
 static void
-rti_delete(struct ifnet *ifp)	/* MUST be called at splsoftnet */
+rti_delete(ifnet_t *ifp)
 {
-	struct router_info *rti;
+	router_info_t *rti;
+
+	KASSERT(in_multi_lock_held());
 
 	LIST_FOREACH(rti, &rti_head, rti_link) {
 		if (rti->rti_ifp == ifp) {
 			LIST_REMOVE(rti, rti_link);
 			pool_put(&igmp_rti_pool, rti);
-			return;
+			break;
 		}
 	}
 }
@@ -148,29 +173,24 @@ rti_delete(struct ifnet *ifp)	/* MUST be
 void
 igmp_init(void)
 {
-
-	sysctl_net_inet_igmp_setup(NULL);
-	pool_init(&igmp_rti_pool, sizeof(struct router_info), 0, 0, 0,
+	pool_init(&igmp_rti_pool, sizeof(router_info_t), 0, 0, 0,
 	    "igmppl", NULL, IPL_SOFTNET);
 	igmpstat_percpu = percpu_alloc(sizeof(uint64_t) * IGMP_NSTATS);
+	sysctl_net_inet_igmp_setup(NULL);
+	LIST_INIT(&rti_head);
 }
 
 void
 igmp_input(struct mbuf *m, ...)
 {
-	int proto;
-	int iphlen;
-	struct ifnet *ifp = m->m_pkthdr.rcvif;
+	ifnet_t *ifp = m->m_pkthdr.rcvif;
 	struct ip *ip = mtod(m, struct ip *);
 	struct igmp *igmp;
-	u_int minlen;
+	u_int minlen, timer;
 	struct in_multi *inm;
-	struct in_multistep step;
-	struct router_info *rti;
 	struct in_ifaddr *ia;
-	u_int timer;
+	int proto, ip_len, iphlen;
 	va_list ap;
-	u_int16_t ip_len;
 
 	va_start(ap, m);
 	iphlen = va_arg(ap, int);
@@ -222,11 +242,8 @@ igmp_input(struct mbuf *m, ...)
 			break;
 
 		if (igmp->igmp_code == 0) {
-			rti = rti_find(ifp);
-			if (rti == NULL)
-				break;
-			rti->rti_type = IGMP_v1_ROUTER;
-			rti->rti_age = 0;
+			struct in_multistep step;
+			router_info_t *rti;
 
 			if (ip->ip_dst.s_addr != INADDR_ALLHOSTS_GROUP) {
 				IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES);
@@ -234,13 +251,23 @@ igmp_input(struct mbuf *m, ...)
 				return;
 			}
 
+			in_multi_lock(RW_WRITER);
+			rti = rti_find(ifp);
+			if (rti == NULL) {
+				in_multi_unlock();
+				break;
+			}
+			rti->rti_type = IGMP_v1_ROUTER;
+			rti->rti_age = 0;
+
 			/*
 			 * Start the timers in all of our membership records
 			 * for the interface on which the query arrived,
 			 * except those that are already running and those
 			 * that belong to a "local" group (224.0.0.X).
 			 */
-			IN_FIRST_MULTI(step, inm);
+
+			inm = in_first_multi(&step);
 			while (inm != NULL) {
 				if (inm->inm_ifp == ifp &&
 				    inm->inm_timer == 0 &&
@@ -248,11 +275,14 @@ igmp_input(struct mbuf *m, ...)
 					inm->inm_state = IGMP_DELAYING_MEMBER;
 					inm->inm_timer = IGMP_RANDOM_DELAY(
 					    IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ);
-					igmp_timers_are_running = 1;
+					igmp_timers_on = true;
 				}
-				IN_NEXT_MULTI(step, inm);
+				inm = in_next_multi(&step);
 			}
+			in_multi_unlock();
 		} else {
+			struct in_multistep step;
+
 			if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
 				IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES);
 				m_freem(m);
@@ -261,7 +291,7 @@ igmp_input(struct mbuf *m, ...)
 
 			timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
 			if (timer == 0)
-				timer =1;
+				timer = 1;
 
 			/*
 			 * Start the timers in all of our membership records
@@ -271,7 +301,8 @@ igmp_input(struct mbuf *m, ...)
 			 * timers already running, check if they need to be
 			 * reset.
 			 */
-			IN_FIRST_MULTI(step, inm);
+			in_multi_lock(RW_WRITER);
+			inm = in_first_multi(&step);
 			while (inm != NULL) {
 				if (inm->inm_ifp == ifp &&
 				    !IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
@@ -289,7 +320,7 @@ igmp_input(struct mbuf *m, ...)
 						    IGMP_DELAYING_MEMBER;
 						inm->inm_timer =
 						    IGMP_RANDOM_DELAY(timer);
-						igmp_timers_are_running = 1;
+						igmp_timers_on = true;
 						break;
 					case IGMP_SLEEPING_MEMBER:
 						inm->inm_state =
@@ -297,8 +328,9 @@ igmp_input(struct mbuf *m, ...)
 						break;
 					}
 				}
-				IN_NEXT_MULTI(step, inm);
+				inm = in_next_multi(&step);
 			}
+			in_multi_unlock();
 		}
 
 		break;
@@ -335,7 +367,8 @@ igmp_input(struct mbuf *m, ...)
 		 * If we belong to the group being reported, stop
 		 * our timer for that group.
 		 */
-		IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+		in_multi_lock(RW_WRITER);
+		inm = in_lookup_multi(igmp->igmp_group, ifp);
 		if (inm != NULL) {
 			inm->inm_timer = 0;
 			IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS);
@@ -355,7 +388,7 @@ igmp_input(struct mbuf *m, ...)
 				break;
 			}
 		}
-
+		in_multi_unlock();
 		break;
 
 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
@@ -403,7 +436,8 @@ igmp_input(struct mbuf *m, ...)
 		 * If we belong to the group being reported, stop
 		 * our timer for that group.
 		 */
-		IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+		in_multi_lock(RW_WRITER);
+		inm = in_lookup_multi(igmp->igmp_group, ifp);
 		if (inm != NULL) {
 			inm->inm_timer = 0;
 			IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS);
@@ -419,7 +453,7 @@ igmp_input(struct mbuf *m, ...)
 				break;
 			}
 		}
-
+		in_multi_unlock();
 		break;
 
 	}
@@ -435,32 +469,32 @@ igmp_input(struct mbuf *m, ...)
 int
 igmp_joingroup(struct in_multi *inm)
 {
-	int report_type;
-	int s = splsoftnet();
-
+	KASSERT(in_multi_lock_held());
 	inm->inm_state = IGMP_IDLE_MEMBER;
 
 	if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
 	    (inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0) {
+		int report_type;
+
 		report_type = rti_fill(inm);
 		if (report_type == 0) {
-			splx(s);
 			return ENOMEM;
 		}
 		igmp_sendpkt(inm, report_type);
 		inm->inm_state = IGMP_DELAYING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(
 		    IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ);
-		igmp_timers_are_running = 1;
+		igmp_timers_on = true;
 	} else
 		inm->inm_timer = 0;
-	splx(s);
+
 	return 0;
 }
 
 void
 igmp_leavegroup(struct in_multi *inm)
 {
+	KASSERT(in_multi_lock_held());
 
 	switch (inm->inm_state) {
 	case IGMP_DELAYING_MEMBER:
@@ -487,14 +521,16 @@ igmp_fasttimo(void)
 	 * Quick check to see if any work needs to be done, in order
 	 * to minimize the overhead of fasttimo processing.
 	 */
-	if (!igmp_timers_are_running)
+	if (!igmp_timers_on) {
 		return;
+	}
 
+	/* XXX: Needed for ip_output(). */
 	mutex_enter(softnet_lock);
-	KERNEL_LOCK(1, NULL);
 
-	igmp_timers_are_running = 0;
-	IN_FIRST_MULTI(step, inm);
+	in_multi_lock(RW_WRITER);
+	igmp_timers_on = false;
+	inm = in_first_multi(&step);
 	while (inm != NULL) {
 		if (inm->inm_timer == 0) {
 			/* do nothing */
@@ -509,46 +545,47 @@ igmp_fasttimo(void)
 				inm->inm_state = IGMP_IDLE_MEMBER;
 			}
 		} else {
-			igmp_timers_are_running = 1;
+			igmp_timers_on = true;
 		}
-		IN_NEXT_MULTI(step, inm);
+		inm = in_next_multi(&step);
 	}
-
-	KERNEL_UNLOCK_ONE(NULL);
+	in_multi_unlock();
 	mutex_exit(softnet_lock);
 }
 
 void
 igmp_slowtimo(void)
 {
-	struct router_info *rti;
+	router_info_t *rti;
 
-	mutex_enter(softnet_lock);
-	KERNEL_LOCK(1, NULL);
+	in_multi_lock(RW_WRITER);
 	LIST_FOREACH(rti, &rti_head, rti_link) {
 		if (rti->rti_type == IGMP_v1_ROUTER &&
 		    ++rti->rti_age >= IGMP_AGE_THRESHOLD) {
 			rti->rti_type = IGMP_v2_ROUTER;
 		}
 	}
-	KERNEL_UNLOCK_ONE(NULL);
-	mutex_exit(softnet_lock);
+	in_multi_unlock();
 }
 
-void
+/*
+ * igmp_sendpkt: construct an IGMP packet, given the multicast structure
+ * and the type, and send the datagram.
+ */
+static void
 igmp_sendpkt(struct in_multi *inm, int type)
 {
 	struct mbuf *m;
 	struct igmp *igmp;
 	struct ip *ip;
 	struct ip_moptions imo;
-#ifdef MROUTING
-	extern struct socket *ip_mrouter;
-#endif /* MROUTING */
+
+	KASSERT(in_multi_lock_held());
 
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 	if (m == NULL)
 		return;
+
 	/*
 	 * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN
 	 * is smaller than mbuf size returned by MGETHDR.
@@ -586,33 +623,38 @@ igmp_sendpkt(struct in_multi *inm, int t
 	 * router, so that the process-level routing demon can hear it.
 	 */
 #ifdef MROUTING
+	extern struct socket *ip_mrouter;
 	imo.imo_multicast_loop = (ip_mrouter != NULL);
 #else
 	imo.imo_multicast_loop = 0;
-#endif /* MROUTING */
-
-	ip_output(m, NULL, NULL, IP_MULTICASTOPTS, &imo, NULL);
+#endif
 
+	/*
+	 * Note: IP_IGMP_MCAST indicates that in_multilock is held.
+	 * The caller must still acquire softnet_lock for ip_output().
+	 */
+	KASSERT(mutex_owned(softnet_lock));
+	ip_output(m, NULL, NULL, IP_IGMP_MCAST, &imo, NULL);
 	IGMP_STATINC(IGMP_STAT_SND_REPORTS);
 }
 
 void
-igmp_purgeif(struct ifnet *ifp)	/* MUST be called at splsoftnet() */
+igmp_purgeif(ifnet_t *ifp)
 {
-	rti_delete(ifp);	/* manipulates pools */
+	in_multi_lock(RW_WRITER);
+	rti_delete(ifp);
+	in_multi_unlock();
 }
 
 static int
 sysctl_net_inet_igmp_stats(SYSCTLFN_ARGS)
 {
-
-	return (NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS));
+	return NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS);
 }
 
 static void
 sysctl_net_inet_igmp_setup(struct sysctllog **clog)
 {
-
 	sysctl_createv(clog, 0, NULL, NULL,
 			CTLFLAG_PERMANENT,
 			CTLTYPE_NODE, "inet", NULL,
@@ -624,7 +666,6 @@ sysctl_net_inet_igmp_setup(struct sysctl
 			SYSCTL_DESCR("Internet Group Management Protocol"),
 			NULL, 0, NULL, 0,
 			CTL_NET, PF_INET, IPPROTO_IGMP, CTL_EOL);
-	
 	sysctl_createv(clog, 0, NULL, NULL,
 			CTLFLAG_PERMANENT,
 			CTLTYPE_STRUCT, "stats",

Index: src/sys/netinet/igmp.h
diff -u src/sys/netinet/igmp.h:1.11 src/sys/netinet/igmp.h:1.12
--- src/sys/netinet/igmp.h:1.11	Tue Dec 25 18:33:46 2007
+++ src/sys/netinet/igmp.h	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: igmp.h,v 1.11 2007/12/25 18:33:46 perry Exp $	*/
+/*	$NetBSD: igmp.h,v 1.12 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -84,13 +84,13 @@
  * IGMP packet format.
  */
 struct igmp {
-	u_int8_t	igmp_type;	/* version & type of IGMP message  */
-	u_int8_t	igmp_code;	/* code for routing sub-messages   */
-	u_int16_t	igmp_cksum;	/* IP-style checksum               */
+	uint8_t		igmp_type;	/* version & type of IGMP message  */
+	uint8_t		igmp_code;	/* code for routing sub-messages   */
+	uint16_t	igmp_cksum;	/* IP-style checksum               */
 	struct in_addr	igmp_group;	/* group address being reported    */
 } __packed;		/*  (zero for queries)             */
 
-#define	IGMP_MINLEN		     8
+#define	IGMP_MINLEN			8
 
 #define	IGMP_HOST_MEMBERSHIP_QUERY	0x11  /* membership query      */
 #define	IGMP_v1_HOST_MEMBERSHIP_REPORT	0x12  /* v1 membership report  */

Index: src/sys/netinet/igmp_var.h
diff -u src/sys/netinet/igmp_var.h:1.23 src/sys/netinet/igmp_var.h:1.24
--- src/sys/netinet/igmp_var.h:1.23	Tue Apr 15 16:02:03 2008
+++ src/sys/netinet/igmp_var.h	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: igmp_var.h,v 1.23 2008/04/15 16:02:03 thorpej Exp $	*/
+/*	$NetBSD: igmp_var.h,v 1.24 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -103,7 +103,7 @@
  * DELAY * countdown frequency).  We assume that the routine random()
  * is defined somewhere (and that it returns a positive number).
  */
-#define	IGMP_RANDOM_DELAY(X)	(random() % (X) + 1)
+#define	IGMP_RANDOM_DELAY(X)	(cprng_fast32() % (X) + 1)
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define	IGMP_HDR_ALIGNED_P(ig)	1

Index: src/sys/netinet/in.c
diff -u src/sys/netinet/in.c:1.145 src/sys/netinet/in.c:1.146
--- src/sys/netinet/in.c:1.145	Thu May 22 22:01:12 2014
+++ src/sys/netinet/in.c	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: in.c,v 1.145 2014/05/22 22:01:12 rmind Exp $	*/
+/*	$NetBSD: in.c,v 1.146 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.145 2014/05/22 22:01:12 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.146 2014/05/29 23:02:48 rmind Exp $");
 
 #include "opt_inet.h"
 #include "opt_inet_conf.h"
@@ -149,6 +149,11 @@ static void	in_sysctl_init(struct sysctl
 #define HOSTZEROBROADCAST 1
 #endif
 
+/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
+#ifndef IN_MULTI_HASH_SIZE
+#define IN_MULTI_HASH_SIZE	509
+#endif
+
 static int			subnetsarelocal = SUBNETSARELOCAL;
 static int			hostzeroisbroadcast = HOSTZEROBROADCAST;
 
@@ -157,13 +162,18 @@ static int			hostzeroisbroadcast = HOSTZ
  * deleted interface addresses.  We use in_ifaddr so that a chain head
  * won't be deallocated until all multicast address record are deleted.
  */
-static TAILQ_HEAD(, in_ifaddr)	in_mk = TAILQ_HEAD_INITIALIZER(in_mk);
+
+LIST_HEAD(in_multihashhead, in_multi);		/* Type of the hash head */
 
 static struct pool		inmulti_pool;
 static u_int			in_multientries;
-struct in_multihashhead *	in_multihashtbl;
+static struct in_multihashhead *in_multihashtbl;
+static u_long			in_multihash;
+static krwlock_t		in_multilock;
+
+#define IN_MULTI_HASH(x, ifp) \
+    (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])
 
-u_long				in_multihash;
 struct in_ifaddrhashhead *	in_ifaddrhashtbl;
 u_long				in_ifaddrhash;
 struct in_ifaddrhead		in_ifaddrhead;
@@ -179,6 +189,7 @@ in_init(void)
 	    &in_ifaddrhash);
 	in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
 	    &in_multihash);
+	rw_init(&in_multilock);
 
 	in_sysctl_init(NULL);
 }
@@ -1067,64 +1078,106 @@ in_broadcast(struct in_addr in, struct i
 }
 
 /*
+ * in_lookup_multi: look up the in_multi record for a given IP
+ * multicast address on a given interface.  If no matching record is
+ * found, return NULL.
+ */
+struct in_multi *
+in_lookup_multi(struct in_addr addr, ifnet_t *ifp)
+{
+	struct in_multi *inm;
+
+	KASSERT(rw_lock_held(&in_multilock));
+
+	LIST_FOREACH(inm, &IN_MULTI_HASH(addr.s_addr, ifp), inm_list) {
+		if (in_hosteq(inm->inm_addr, addr) && inm->inm_ifp == ifp)
+			break;
+	}
+	return inm;
+}
+
+/*
+ * in_multi_group: check whether the address belongs to an IP multicast
+ * group we are joined on this interface.  Returns true or false.
+ */
+bool
+in_multi_group(struct in_addr addr, ifnet_t *ifp, int flags)
+{
+	bool ingroup;
+
+	if (__predict_true(flags & IP_IGMP_MCAST) == 0) {
+		rw_enter(&in_multilock, RW_READER);
+		ingroup = in_lookup_multi(addr, ifp) != NULL;
+		rw_exit(&in_multilock);
+	} else {
+		/* XXX Recursive call from ip_output(). */
+		KASSERT(rw_lock_held(&in_multilock));
+		ingroup = in_lookup_multi(addr, ifp) != NULL;
+	}
+	return ingroup;
+}
+
+/*
  * Add an address to the list of IP multicast addresses for a given interface.
  */
 struct in_multi *
-in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+in_addmulti(struct in_addr *ap, ifnet_t *ifp)
 {
 	struct sockaddr_in sin;
 	struct in_multi *inm;
-	int s = splsoftnet();
 
 	/*
 	 * See if address already in list.
 	 */
-	IN_LOOKUP_MULTI(*ap, ifp, inm);
+	rw_enter(&in_multilock, RW_WRITER);
+	inm = in_lookup_multi(*ap, ifp);
 	if (inm != NULL) {
 		/*
 		 * Found it; just increment the reference count.
 		 */
-		++inm->inm_refcount;
-	} else {
-		/*
-		 * New address; allocate a new multicast record
-		 * and link it into the interface's multicast list.
-		 */
-		inm = pool_get(&inmulti_pool, PR_NOWAIT);
-		if (inm == NULL) {
-			splx(s);
-			return (NULL);
-		}
-		inm->inm_addr = *ap;
-		inm->inm_ifp = ifp;
-		inm->inm_refcount = 1;
-		LIST_INSERT_HEAD(
-		    &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
-		    inm, inm_list);
-		/*
-		 * Ask the network driver to update its multicast reception
-		 * filter appropriately for the new address.
-		 */
-		sockaddr_in_init(&sin, ap, 0);
-		if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
-			LIST_REMOVE(inm, inm_list);
-			pool_put(&inmulti_pool, inm);
-			splx(s);
-			return (NULL);
-		}
-		/*
-		 * Let IGMP know that we have joined a new IP multicast group.
-		 */
-		if (igmp_joingroup(inm) != 0) {
-			LIST_REMOVE(inm, inm_list);
-			pool_put(&inmulti_pool, inm);
-			splx(s);
-			return (NULL);
-		}
-		in_multientries++;
+		inm->inm_refcount++;
+		rw_exit(&in_multilock);
+		return inm;
 	}
-	splx(s);
-	return (inm);
+
+	/*
+	 * New address; allocate a new multicast record.
+	 */
+	inm = pool_get(&inmulti_pool, PR_NOWAIT);
+	if (inm == NULL) {
+		rw_exit(&in_multilock);
+		return NULL;
+	}
+	inm->inm_addr = *ap;
+	inm->inm_ifp = ifp;
+	inm->inm_refcount = 1;
+
+	/*
+	 * Ask the network driver to update its multicast reception
+	 * filter appropriately for the new address.
+	 */
+	sockaddr_in_init(&sin, ap, 0);
+	if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
+		rw_exit(&in_multilock);
+		pool_put(&inmulti_pool, inm);
+		return NULL;
+	}
+
+	/*
+	 * Let IGMP know that we have joined a new IP multicast group.
+	 */
+	if (igmp_joingroup(inm) != 0) {
+		rw_exit(&in_multilock);
+		pool_put(&inmulti_pool, inm);
+		return NULL;
+	}
+	LIST_INSERT_HEAD(
+	    &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
+	    inm, inm_list);
+	in_multientries++;
+	rw_exit(&in_multilock);
+
+	return inm;
 }
 
 /*
@@ -1134,28 +1187,85 @@ void
 in_delmulti(struct in_multi *inm)
 {
 	struct sockaddr_in sin;
-	int s = splsoftnet();
 
-	if (--inm->inm_refcount == 0) {
-		/*
-		 * No remaining claims to this record; let IGMP know that
-		 * we are leaving the multicast group.
-		 */
-		igmp_leavegroup(inm);
-		/*
-		 * Unlink from list.
-		 */
-		LIST_REMOVE(inm, inm_list);
-		in_multientries--;
-		/*
-		 * Notify the network driver to update its multicast reception
-		 * filter.
-		 */
-		sockaddr_in_init(&sin, &inm->inm_addr, 0);
-		if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));
-		pool_put(&inmulti_pool, inm);
+	rw_enter(&in_multilock, RW_WRITER);
+	if (--inm->inm_refcount > 0) {
+		rw_exit(&in_multilock);
+		return;
 	}
-	splx(s);
+
+	/*
+	 * No remaining claims to this record; let IGMP know that
+	 * we are leaving the multicast group.
+	 */
+	igmp_leavegroup(inm);
+
+	/*
+	 * Notify the network driver to update its multicast reception
+	 * filter.
+	 */
+	sockaddr_in_init(&sin, &inm->inm_addr, 0);
+	if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));
+
+	/*
+	 * Unlink from list.
+	 */
+	LIST_REMOVE(inm, inm_list);
+	in_multientries--;
+	rw_exit(&in_multilock);
+
+	pool_put(&inmulti_pool, inm);
+}
+
+/*
+ * in_next_multi: step through all of the in_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide.  in_first_multi(), below, must be called to initialize "step"
+ * and get the first record.  Both macros return a NULL "inm" when there
+ * are no remaining records.
+ */
+struct in_multi *
+in_next_multi(struct in_multistep *step)
+{
+	struct in_multi *inm;
+
+	KASSERT(rw_lock_held(&in_multilock));
+
+	while (step->i_inm == NULL && step->i_n < IN_MULTI_HASH_SIZE) {
+		step->i_inm = LIST_FIRST(&in_multihashtbl[++step->i_n]);
+	}
+	if ((inm = step->i_inm) != NULL) {
+		step->i_inm = LIST_NEXT(inm, inm_list);
+	}
+	return inm;
+}
+
+struct in_multi *
+in_first_multi(struct in_multistep *step)
+{
+	KASSERT(rw_lock_held(&in_multilock));
+
+	step->i_n = 0;
+	step->i_inm = LIST_FIRST(&in_multihashtbl[0]);
+	return in_next_multi(step);
+}
+
+void
+in_multi_lock(int op)
+{
+	rw_enter(&in_multilock, op);
+}
+
+void
+in_multi_unlock(void)
+{
+	rw_exit(&in_multilock);
+}
+
+int
+in_multi_lock_held(void)
+{
+	return rw_lock_held(&in_multilock);
 }
 
 struct sockaddr_in *

Index: src/sys/netinet/in_var.h
diff -u src/sys/netinet/in_var.h:1.67 src/sys/netinet/in_var.h:1.68
--- src/sys/netinet/in_var.h:1.67	Fri May 23 19:27:48 2014
+++ src/sys/netinet/in_var.h	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: in_var.h,v 1.67 2014/05/23 19:27:48 rmind Exp $	*/
+/*	$NetBSD: in_var.h,v 1.68 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -107,13 +107,11 @@ struct	in_aliasreq {
  */
 #define	IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
 
+#ifdef _KERNEL
 
-#ifdef	_KERNEL
+/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
 #ifndef IN_IFADDR_HASH_SIZE
-#define IN_IFADDR_HASH_SIZE 509	/* 61, 127, 251, 509, 1021, 2039 are good */
-#endif
-#ifndef IN_MULTI_HASH_SIZE
-#define IN_MULTI_HASH_SIZE 509	/* 61, 127, 251, 509, 1021, 2039 are good */
+#define IN_IFADDR_HASH_SIZE	509
 #endif
 
 /*
@@ -123,20 +121,14 @@ struct	in_aliasreq {
  */
 
 #define	IN_IFADDR_HASH(x) in_ifaddrhashtbl[(u_long)(x) % IN_IFADDR_HASH_SIZE]
-#define IN_MULTI_HASH(x, ifp) \
-	(in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])
 
 LIST_HEAD(in_ifaddrhashhead, in_ifaddr);	/* Type of the hash head */
 TAILQ_HEAD(in_ifaddrhead, in_ifaddr);		/* Type of the list head */
-LIST_HEAD(in_multihashhead, in_multi);		/* Type of the hash head */
 
 extern	u_long in_ifaddrhash;			/* size of hash table - 1 */
 extern  struct in_ifaddrhashhead *in_ifaddrhashtbl;	/* Hash table head */
 extern  struct in_ifaddrhead in_ifaddrhead;		/* List head (in ip_input) */
 
-extern	u_long in_multihash;			/* size of hash table - 1 */
-extern  struct in_multihashhead *in_multihashtbl;	/* Hash table head */
-
 extern	struct	ifqueue	ipintrq;		/* ip packet input queue */
 extern	const	int	inetctlerrmap[];
 
@@ -203,21 +195,13 @@ extern	const	int	inetctlerrmap[];
 #endif
 
 /*
- * Per-interface router version information.
- */
-struct router_info {
-	LIST_ENTRY(router_info) rti_link;
-	struct	ifnet *rti_ifp;
-	int	rti_type;	/* type of router on this interface */
-	int	rti_age;	/* time since last v1 query */
-};
-
-/*
  * Internet multicast address structure.  There is one of these for each IP
  * multicast group to which this host belongs on a given network interface.
  * They are kept in a linked list, rooted in the interface's in_ifaddr
  * structure.
  */
+struct router_info;
+
 struct in_multi {
 	LIST_ENTRY(in_multi) inm_list;	/* list of multicast addresses */
 	struct	router_info *inm_rti;	/* router version info */
@@ -230,55 +214,24 @@ struct in_multi {
 
 #ifdef _KERNEL
 /*
- * Structure used by macros below to remember position when stepping through
- * all of the in_multi records.
+ * Structure used by functions below to remember position when stepping
+ * through all of the in_multi records.
  */
 struct in_multistep {
 	int i_n;
 	struct in_multi *i_inm;
 };
 
-/*
- * Macro for looking up the in_multi record for a given IP multicast address
- * on a given interface.  If no matching record is found, "inm" returns NULL.
- */
-#define IN_LOOKUP_MULTI(addr, ifp, inm) \
-	/* struct in_addr addr; */ \
-	/* struct ifnet *ifp; */ \
-	/* struct in_multi *inm; */ \
-{ \
-	LIST_FOREACH((inm), &IN_MULTI_HASH(((addr).s_addr), (ifp)), inm_list) {\
-		if (in_hosteq((inm)->inm_addr, (addr)) && \
-		    (inm)->inm_ifp == (ifp)) \
-			break; \
-	} \
-}
-
-/*
- * Macro to step through all of the in_multi records, one at a time.
- * The current position is remembered in "step", which the caller must
- * provide.  IN_FIRST_MULTI(), below, must be called to initialize "step"
- * and get the first record.  Both macros return a NULL "inm" when there
- * are no remaining records.
- */
-#define IN_NEXT_MULTI(step, inm) \
-	/* struct in_multistep  step; */ \
-	/* struct in_multi *inm; */ \
-{ \
-	while ((step).i_inm == NULL && (step).i_n < IN_MULTI_HASH_SIZE) \
-		(step).i_inm = LIST_FIRST(&in_multihashtbl[++(step).i_n]); \
-	if (((inm) = (step).i_inm) != NULL) \
-		(step).i_inm = LIST_NEXT((inm), inm_list); \
-}
-
-#define IN_FIRST_MULTI(step, inm) \
-	/* struct in_multistep step; */ \
-	/* struct in_multi *inm; */ \
-{ \
-	(step).i_n = 0; \
-	(step).i_inm = LIST_FIRST(&in_multihashtbl[0]); \
-	IN_NEXT_MULTI((step), (inm)); \
-}
+bool in_multi_group(struct in_addr, struct ifnet *, int);
+struct in_multi *in_first_multi(struct in_multistep *);
+struct in_multi *in_next_multi(struct in_multistep *);
+struct in_multi *in_lookup_multi(struct in_addr, struct ifnet *);
+struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
+void in_delmulti(struct in_multi *);
+
+void in_multi_lock(int);
+void in_multi_unlock(void);
+int in_multi_lock_held(void);
 
 struct ifaddr;
 
@@ -287,8 +240,6 @@ int	in_ifinit(struct ifnet *,
 void	in_savemkludge(struct in_ifaddr *);
 void	in_restoremkludge(struct in_ifaddr *, struct ifnet *);
 void	in_purgemkludge(struct ifnet *);
-struct	in_multi *in_addmulti(struct in_addr *, struct ifnet *);
-void	in_delmulti(struct in_multi *);
 void	in_ifscrub(struct ifnet *, struct in_ifaddr *);
 void	in_setmaxmtu(void);
 const char *in_fmtaddr(struct in_addr);

Index: src/sys/netinet/ip_carp.c
diff -u src/sys/netinet/ip_carp.c:1.55 src/sys/netinet/ip_carp.c:1.56
--- src/sys/netinet/ip_carp.c:1.55	Sat May 17 20:44:24 2014
+++ src/sys/netinet/ip_carp.c	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_carp.c,v 1.55 2014/05/17 20:44:24 rmind Exp $	*/
+/*	$NetBSD: ip_carp.c,v 1.56 2014/05/29 23:02:48 rmind Exp $	*/
 /*	$OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
 #include "opt_mbuftrace.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.55 2014/05/17 20:44:24 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.56 2014/05/29 23:02:48 rmind Exp $");
 
 /*
  * TODO:
@@ -1670,15 +1670,13 @@ carp_addr_updated(void *v)
 	/* Handle a callback after SIOCDIFADDR */
 	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
 		struct in_addr mc_addr;
-		struct in_multi *inm;
 
 		sc->sc_naddrs = new_naddrs;
 		sc->sc_naddrs6 = new_naddrs6;
 
 		/* Re-establish multicast membership removed by in_control */
 		mc_addr.s_addr = INADDR_CARP_GROUP;
-		IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
-		if (inm == NULL) {
+		if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
 			memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
 
 			if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)

Index: src/sys/netinet/ip_input.c
diff -u src/sys/netinet/ip_input.c:1.315 src/sys/netinet/ip_input.c:1.316
--- src/sys/netinet/ip_input.c:1.315	Wed May 28 19:19:33 2014
+++ src/sys/netinet/ip_input.c	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_input.c,v 1.315 2014/05/28 19:19:33 christos Exp $	*/
+/*	$NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.315 2014/05/28 19:19:33 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $");
 
 #include "opt_inet.h"
 #include "opt_compat_netbsd.h"
@@ -632,7 +632,6 @@ ip_input(struct mbuf *m)
 		}
 	}
 	if (IN_MULTICAST(ip->ip_dst.s_addr)) {
-		struct in_multi *inm;
 #ifdef MROUTING
 		extern struct socket *ip_mrouter;
 
@@ -669,8 +668,7 @@ ip_input(struct mbuf *m)
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
-		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
-		if (inm == NULL) {
+		if (!in_multi_group(ip->ip_dst, ifp, 0)) {
 			IP_STATINC(IP_STAT_CANTFORWARD);
 			m_freem(m);
 			return;

Index: src/sys/netinet/ip_output.c
diff -u src/sys/netinet/ip_output.c:1.227 src/sys/netinet/ip_output.c:1.228
--- src/sys/netinet/ip_output.c:1.227	Fri May 23 00:02:14 2014
+++ src/sys/netinet/ip_output.c	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_output.c,v 1.227 2014/05/23 00:02:14 rmind Exp $	*/
+/*	$NetBSD: ip_output.c,v 1.228 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.227 2014/05/23 00:02:14 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.228 2014/05/29 23:02:48 rmind Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -278,7 +278,7 @@ ip_output(struct mbuf *m0, ...)
 
 	if (IN_MULTICAST(ip->ip_dst.s_addr) ||
 	    (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
-		struct in_multi *inm;
+		bool inmgroup;
 
 		m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
 			M_BCAST : M_MCAST;
@@ -331,9 +331,8 @@ ip_output(struct mbuf *m0, ...)
 			ip->ip_src = xia->ia_addr.sin_addr;
 		}
 
-		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
-		if (inm != NULL &&
-		   (imo == NULL || imo->imo_multicast_loop)) {
+		inmgroup = in_multi_group(ip->ip_dst, ifp, flags);
+		if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) {
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not

Index: src/sys/netinet/ip_var.h
diff -u src/sys/netinet/ip_var.h:1.103 src/sys/netinet/ip_var.h:1.104
--- src/sys/netinet/ip_var.h:1.103	Fri May 23 19:35:24 2014
+++ src/sys/netinet/ip_var.h	Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_var.h,v 1.103 2014/05/23 19:35:24 rmind Exp $	*/
+/*	$NetBSD: ip_var.h,v 1.104 2014/05/29 23:02:48 rmind Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -166,13 +166,20 @@ struct ip_moptions {
 #include "opt_mbuftrace.h"
 #endif
 
-/* flags passed to ip_output as last parameter */
-#define	IP_FORWARDING		0x1		/* most of ip header exists */
-#define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
-#define	IP_RETURNMTU		0x4		/* pass back mtu on EMSGSIZE */
-#define	IP_NOIPNEWID		0x8		/* don't fill in ip_id */
+/*
+ * The following flags can be passed to ip_output() as last parameter
+ */
+#define	IP_FORWARDING		0x0001		/* most of ip header exists */
+#define	IP_RAWOUTPUT		0x0002		/* raw ip header exists */
+#define	IP_RETURNMTU		0x0004		/* pass back mtu on EMSGSIZE */
+#define	IP_NOIPNEWID		0x0008		/* don't fill in ip_id */
+
+CTASSERT(SO_DONTROUTE ==	0x0010);
+CTASSERT(SO_BROADCAST ==	0x0020);
 #define	IP_ROUTETOIF		SO_DONTROUTE	/* bypass routing tables */
 #define	IP_ALLOWBROADCAST	SO_BROADCAST	/* can send broadcast packets */
+
+#define	IP_IGMP_MCAST		0x0040		/* IGMP for mcast join/leave */
 #define	IP_MTUDISC		0x0400		/* Path MTU Discovery; set DF */
 
 extern struct domain inetdomain;

Reply via email to