Module Name: src Committed By: rmind Date: Thu May 29 23:02:48 UTC 2014
Modified Files: src/sys/netinet: igmp.c igmp.h igmp_var.h in.c in_var.h ip_carp.c ip_input.c ip_output.c ip_var.h Log Message: Make IGMP and multicast group management code MP-safe. Use a read-write lock to protect the hash table of multicast address records; also, make it private and eliminate some macros. In the long term, the lookup path ought to be optimised. To generate a diff of this commit: cvs rdiff -u -r1.54 -r1.55 src/sys/netinet/igmp.c cvs rdiff -u -r1.11 -r1.12 src/sys/netinet/igmp.h cvs rdiff -u -r1.23 -r1.24 src/sys/netinet/igmp_var.h cvs rdiff -u -r1.145 -r1.146 src/sys/netinet/in.c cvs rdiff -u -r1.67 -r1.68 src/sys/netinet/in_var.h cvs rdiff -u -r1.55 -r1.56 src/sys/netinet/ip_carp.c cvs rdiff -u -r1.315 -r1.316 src/sys/netinet/ip_input.c cvs rdiff -u -r1.227 -r1.228 src/sys/netinet/ip_output.c cvs rdiff -u -r1.103 -r1.104 src/sys/netinet/ip_var.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/netinet/igmp.c diff -u src/sys/netinet/igmp.c:1.54 src/sys/netinet/igmp.c:1.55 --- src/sys/netinet/igmp.c:1.54 Tue Feb 25 18:30:12 2014 +++ src/sys/netinet/igmp.c Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: igmp.c,v 1.54 2014/02/25 18:30:12 pooka Exp $ */ +/* $NetBSD: igmp.c,v 1.55 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -40,7 +40,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.54 2014/02/25 18:30:12 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.55 2014/05/29 23:02:48 rmind Exp $"); #include "opt_mrouting.h" @@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.5 #include <sys/socketvar.h> #include <sys/protosw.h> #include <sys/systm.h> +#include <sys/cprng.h> #include <sys/sysctl.h> #include <net/if.h> @@ -64,83 +65,107 @@ __KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.5 #include <netinet/igmp.h> #include <netinet/igmp_var.h> -#define IP_MULTICASTOPTS 0 - -static struct pool igmp_rti_pool; +/* + * Per-interface router version information. + */ +typedef struct router_info { + LIST_ENTRY(router_info) rti_link; + ifnet_t * rti_ifp; + int rti_type; /* type of router on this interface */ + int rti_age; /* time since last v1 query */ +} router_info_t; -static percpu_t *igmpstat_percpu; +/* + * The router-info list and the timer flag are protected by in_multilock. + * + * Lock order: + * + * softnet_lock -> + * in_multilock + */ +static struct pool igmp_rti_pool __cacheline_aligned; +static LIST_HEAD(, router_info) rti_head __cacheline_aligned; +static int igmp_timers_on __cacheline_aligned; +static percpu_t * igmpstat_percpu __read_mostly; #define IGMP_STATINC(x) _NET_STATINC(igmpstat_percpu, x) -int igmp_timers_are_running; -static LIST_HEAD(, router_info) rti_head = LIST_HEAD_INITIALIZER(rti_head); - -void igmp_sendpkt(struct in_multi *, int); -static int rti_fill(struct in_multi *); -static struct router_info *rti_find(struct ifnet *); -static void rti_delete(struct ifnet *); - -static void sysctl_net_inet_igmp_setup(struct sysctllog **); +static void igmp_sendpkt(struct in_multi *, int); +static int rti_fill(struct in_multi *); +static router_info_t * rti_find(struct ifnet *); +static void rti_delete(struct ifnet *); +static void sysctl_net_inet_igmp_setup(struct sysctllog **); +/* + * rti_fill: associate router information with the given multicast group; + * if there is no router information for the interface, then create it. + */ static int rti_fill(struct in_multi *inm) { - struct router_info *rti; + router_info_t *rti; + + KASSERT(in_multi_lock_held()); - /* this function is called at splsoftnet() */ LIST_FOREACH(rti, &rti_head, rti_link) { if (rti->rti_ifp == inm->inm_ifp) { inm->inm_rti = rti; - if (rti->rti_type == IGMP_v1_ROUTER) - return (IGMP_v1_HOST_MEMBERSHIP_REPORT); - else - return (IGMP_v2_HOST_MEMBERSHIP_REPORT); + return rti->rti_type == IGMP_v1_ROUTER ? + IGMP_v1_HOST_MEMBERSHIP_REPORT : + IGMP_v2_HOST_MEMBERSHIP_REPORT; } } - rti = pool_get(&igmp_rti_pool, PR_NOWAIT); - if (rti == NULL) + if (rti == NULL) { return 0; + } rti->rti_ifp = inm->inm_ifp; rti->rti_type = IGMP_v2_ROUTER; LIST_INSERT_HEAD(&rti_head, rti, rti_link); inm->inm_rti = rti; - return (IGMP_v2_HOST_MEMBERSHIP_REPORT); + return IGMP_v2_HOST_MEMBERSHIP_REPORT; } -static struct router_info * -rti_find(struct ifnet *ifp) +/* + * rti_find: lookup or create router information for the given interface. + */ +static router_info_t * +rti_find(ifnet_t *ifp) { - struct router_info *rti; - int s = splsoftnet(); + router_info_t *rti; + + KASSERT(in_multi_lock_held()); LIST_FOREACH(rti, &rti_head, rti_link) { if (rti->rti_ifp == ifp) - return (rti); + return rti; } - rti = pool_get(&igmp_rti_pool, PR_NOWAIT); if (rti == NULL) { - splx(s); return NULL; } rti->rti_ifp = ifp; rti->rti_type = IGMP_v2_ROUTER; LIST_INSERT_HEAD(&rti_head, rti, rti_link); - splx(s); - return (rti); + return rti; } +/* + * rti_delete: remove and free the router information entry for the + * given interface. + */ static void -rti_delete(struct ifnet *ifp) /* MUST be called at splsoftnet */ +rti_delete(ifnet_t *ifp) { - struct router_info *rti; + router_info_t *rti; + + KASSERT(in_multi_lock_held()); LIST_FOREACH(rti, &rti_head, rti_link) { if (rti->rti_ifp == ifp) { LIST_REMOVE(rti, rti_link); pool_put(&igmp_rti_pool, rti); - return; + break; } } } @@ -148,29 +173,24 @@ rti_delete(struct ifnet *ifp) /* MUST be void igmp_init(void) { - - sysctl_net_inet_igmp_setup(NULL); - pool_init(&igmp_rti_pool, sizeof(struct router_info), 0, 0, 0, + pool_init(&igmp_rti_pool, sizeof(router_info_t), 0, 0, 0, "igmppl", NULL, IPL_SOFTNET); igmpstat_percpu = percpu_alloc(sizeof(uint64_t) * IGMP_NSTATS); + sysctl_net_inet_igmp_setup(NULL); + LIST_INIT(&rti_head); } void igmp_input(struct mbuf *m, ...) { - int proto; - int iphlen; - struct ifnet *ifp = m->m_pkthdr.rcvif; + ifnet_t *ifp = m->m_pkthdr.rcvif; struct ip *ip = mtod(m, struct ip *); struct igmp *igmp; - u_int minlen; + u_int minlen, timer; struct in_multi *inm; - struct in_multistep step; - struct router_info *rti; struct in_ifaddr *ia; - u_int timer; + int proto, ip_len, iphlen; va_list ap; - u_int16_t ip_len; va_start(ap, m); iphlen = va_arg(ap, int); @@ -222,11 +242,8 @@ igmp_input(struct mbuf *m, ...) break; if (igmp->igmp_code == 0) { - rti = rti_find(ifp); - if (rti == NULL) - break; - rti->rti_type = IGMP_v1_ROUTER; - rti->rti_age = 0; + struct in_multistep step; + router_info_t *rti; if (ip->ip_dst.s_addr != INADDR_ALLHOSTS_GROUP) { IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES); @@ -234,13 +251,23 @@ igmp_input(struct mbuf *m, ...) return; } + in_multi_lock(RW_WRITER); + rti = rti_find(ifp); + if (rti == NULL) { + in_multi_unlock(); + break; + } + rti->rti_type = IGMP_v1_ROUTER; + rti->rti_age = 0; + /* * Start the timers in all of our membership records * for the interface on which the query arrived, * except those that are already running and those * that belong to a "local" group (224.0.0.X). */ - IN_FIRST_MULTI(step, inm); + + inm = in_first_multi(&step); while (inm != NULL) { if (inm->inm_ifp == ifp && inm->inm_timer == 0 && @@ -248,11 +275,14 @@ igmp_input(struct mbuf *m, ...) inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); - igmp_timers_are_running = 1; + igmp_timers_on = true; } - IN_NEXT_MULTI(step, inm); + inm = in_next_multi(&step); } + in_multi_unlock(); } else { + struct in_multistep step; + if (!IN_MULTICAST(ip->ip_dst.s_addr)) { IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES); m_freem(m); @@ -261,7 +291,7 @@ igmp_input(struct mbuf *m, ...) timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) - timer =1; + timer = 1; /* * Start the timers in all of our membership records @@ -271,7 +301,8 @@ igmp_input(struct mbuf *m, ...) * timers already running, check if they need to be * reset. */ - IN_FIRST_MULTI(step, inm); + in_multi_lock(RW_WRITER); + inm = in_first_multi(&step); while (inm != NULL) { if (inm->inm_ifp == ifp && !IN_LOCAL_GROUP(inm->inm_addr.s_addr) && @@ -289,7 +320,7 @@ igmp_input(struct mbuf *m, ...) IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); - igmp_timers_are_running = 1; + igmp_timers_on = true; break; case IGMP_SLEEPING_MEMBER: inm->inm_state = @@ -297,8 +328,9 @@ igmp_input(struct mbuf *m, ...) break; } } - IN_NEXT_MULTI(step, inm); + inm = in_next_multi(&step); } + in_multi_unlock(); } break; @@ -335,7 +367,8 @@ igmp_input(struct mbuf *m, ...) * If we belong to the group being reported, stop * our timer for that group. */ - IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + in_multi_lock(RW_WRITER); + inm = in_lookup_multi(igmp->igmp_group, ifp); if (inm != NULL) { inm->inm_timer = 0; IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS); @@ -355,7 +388,7 @@ igmp_input(struct mbuf *m, ...) break; } } - + in_multi_unlock(); break; case IGMP_v2_HOST_MEMBERSHIP_REPORT: @@ -403,7 +436,8 @@ igmp_input(struct mbuf *m, ...) * If we belong to the group being reported, stop * our timer for that group. */ - IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + in_multi_lock(RW_WRITER); + inm = in_lookup_multi(igmp->igmp_group, ifp); if (inm != NULL) { inm->inm_timer = 0; IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS); @@ -419,7 +453,7 @@ igmp_input(struct mbuf *m, ...) break; } } - + in_multi_unlock(); break; } @@ -435,32 +469,32 @@ igmp_input(struct mbuf *m, ...) int igmp_joingroup(struct in_multi *inm) { - int report_type; - int s = splsoftnet(); - + KASSERT(in_multi_lock_held()); inm->inm_state = IGMP_IDLE_MEMBER; if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) && (inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0) { + int report_type; + report_type = rti_fill(inm); if (report_type == 0) { - splx(s); return ENOMEM; } igmp_sendpkt(inm, report_type); inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); - igmp_timers_are_running = 1; + igmp_timers_on = true; } else inm->inm_timer = 0; - splx(s); + return 0; } void igmp_leavegroup(struct in_multi *inm) { + KASSERT(in_multi_lock_held()); switch (inm->inm_state) { case IGMP_DELAYING_MEMBER: @@ -487,14 +521,16 @@ igmp_fasttimo(void) * Quick check to see if any work needs to be done, in order * to minimize the overhead of fasttimo processing. */ - if (!igmp_timers_are_running) + if (!igmp_timers_on) { return; + } + /* XXX: Needed for ip_output(). */ mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); - igmp_timers_are_running = 0; - IN_FIRST_MULTI(step, inm); + in_multi_lock(RW_WRITER); + igmp_timers_on = false; + inm = in_first_multi(&step); while (inm != NULL) { if (inm->inm_timer == 0) { /* do nothing */ @@ -509,46 +545,47 @@ igmp_fasttimo(void) inm->inm_state = IGMP_IDLE_MEMBER; } } else { - igmp_timers_are_running = 1; + igmp_timers_on = true; } - IN_NEXT_MULTI(step, inm); + inm = in_next_multi(&step); } - - KERNEL_UNLOCK_ONE(NULL); + in_multi_unlock(); mutex_exit(softnet_lock); } void igmp_slowtimo(void) { - struct router_info *rti; + router_info_t *rti; - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); + in_multi_lock(RW_WRITER); LIST_FOREACH(rti, &rti_head, rti_link) { if (rti->rti_type == IGMP_v1_ROUTER && ++rti->rti_age >= IGMP_AGE_THRESHOLD) { rti->rti_type = IGMP_v2_ROUTER; } } - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); + in_multi_unlock(); } -void +/* + * igmp_sendpkt: construct an IGMP packet, given the multicast structure + * and the type, and send the datagram. + */ +static void igmp_sendpkt(struct in_multi *inm, int type) { struct mbuf *m; struct igmp *igmp; struct ip *ip; struct ip_moptions imo; -#ifdef MROUTING - extern struct socket *ip_mrouter; -#endif /* MROUTING */ + + KASSERT(in_multi_lock_held()); MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == NULL) return; + /* * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN * is smaller than mbuf size returned by MGETHDR. @@ -586,33 +623,38 @@ igmp_sendpkt(struct in_multi *inm, int t * router, so that the process-level routing demon can hear it. */ #ifdef MROUTING + extern struct socket *ip_mrouter; imo.imo_multicast_loop = (ip_mrouter != NULL); #else imo.imo_multicast_loop = 0; -#endif /* MROUTING */ - - ip_output(m, NULL, NULL, IP_MULTICASTOPTS, &imo, NULL); +#endif + /* + * Note: IP_IGMP_MCAST indicates that in_multilock is held. + * The caller must still acquire softnet_lock for ip_output(). + */ + KASSERT(mutex_owned(softnet_lock)); + ip_output(m, NULL, NULL, IP_IGMP_MCAST, &imo, NULL); IGMP_STATINC(IGMP_STAT_SND_REPORTS); } void -igmp_purgeif(struct ifnet *ifp) /* MUST be called at splsoftnet() */ +igmp_purgeif(ifnet_t *ifp) { - rti_delete(ifp); /* manipulates pools */ + in_multi_lock(RW_WRITER); + rti_delete(ifp); + in_multi_unlock(); } static int sysctl_net_inet_igmp_stats(SYSCTLFN_ARGS) { - - return (NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS)); + return NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS); } static void sysctl_net_inet_igmp_setup(struct sysctllog **clog) { - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", NULL, @@ -624,7 +666,6 @@ sysctl_net_inet_igmp_setup(struct sysctl SYSCTL_DESCR("Internet Group Management Protocol"), NULL, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_IGMP, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", Index: src/sys/netinet/igmp.h diff -u src/sys/netinet/igmp.h:1.11 src/sys/netinet/igmp.h:1.12 --- src/sys/netinet/igmp.h:1.11 Tue Dec 25 18:33:46 2007 +++ src/sys/netinet/igmp.h Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: igmp.h,v 1.11 2007/12/25 18:33:46 perry Exp $ */ +/* $NetBSD: igmp.h,v 1.12 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (c) 1992, 1993 @@ -84,13 +84,13 @@ * IGMP packet format. */ struct igmp { - u_int8_t igmp_type; /* version & type of IGMP message */ - u_int8_t igmp_code; /* code for routing sub-messages */ - u_int16_t igmp_cksum; /* IP-style checksum */ + uint8_t igmp_type; /* version & type of IGMP message */ + uint8_t igmp_code; /* code for routing sub-messages */ + uint16_t igmp_cksum; /* IP-style checksum */ struct in_addr igmp_group; /* group address being reported */ } __packed; /* (zero for queries) */ -#define IGMP_MINLEN 8 +#define IGMP_MINLEN 8 #define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* membership query */ #define IGMP_v1_HOST_MEMBERSHIP_REPORT 0x12 /* v1 membership report */ Index: src/sys/netinet/igmp_var.h diff -u src/sys/netinet/igmp_var.h:1.23 src/sys/netinet/igmp_var.h:1.24 --- src/sys/netinet/igmp_var.h:1.23 Tue Apr 15 16:02:03 2008 +++ src/sys/netinet/igmp_var.h Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: igmp_var.h,v 1.23 2008/04/15 16:02:03 thorpej Exp $ */ +/* $NetBSD: igmp_var.h,v 1.24 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (c) 1992, 1993 @@ -103,7 +103,7 @@ * DELAY * countdown frequency). We assume that the routine random() * is defined somewhere (and that it returns a positive number). */ -#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1) +#define IGMP_RANDOM_DELAY(X) (cprng_fast32() % (X) + 1) #ifdef __NO_STRICT_ALIGNMENT #define IGMP_HDR_ALIGNED_P(ig) 1 Index: src/sys/netinet/in.c diff -u src/sys/netinet/in.c:1.145 src/sys/netinet/in.c:1.146 --- src/sys/netinet/in.c:1.145 Thu May 22 22:01:12 2014 +++ src/sys/netinet/in.c Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: in.c,v 1.145 2014/05/22 22:01:12 rmind Exp $ */ +/* $NetBSD: in.c,v 1.146 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,7 +91,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.145 2014/05/22 22:01:12 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.146 2014/05/29 23:02:48 rmind Exp $"); #include "opt_inet.h" #include "opt_inet_conf.h" @@ -149,6 +149,11 @@ static void in_sysctl_init(struct sysctl #define HOSTZEROBROADCAST 1 #endif +/* Note: 61, 127, 251, 509, 1021, 2039 are good. */ +#ifndef IN_MULTI_HASH_SIZE +#define IN_MULTI_HASH_SIZE 509 +#endif + static int subnetsarelocal = SUBNETSARELOCAL; static int hostzeroisbroadcast = HOSTZEROBROADCAST; @@ -157,13 +162,18 @@ static int hostzeroisbroadcast = HOSTZ * deleted interface addresses. We use in_ifaddr so that a chain head * won't be deallocated until all multicast address record are deleted. */ -static TAILQ_HEAD(, in_ifaddr) in_mk = TAILQ_HEAD_INITIALIZER(in_mk); + +LIST_HEAD(in_multihashhead, in_multi); /* Type of the hash head */ static struct pool inmulti_pool; static u_int in_multientries; -struct in_multihashhead * in_multihashtbl; +static struct in_multihashhead *in_multihashtbl; +static u_long in_multihash; +static krwlock_t in_multilock; + +#define IN_MULTI_HASH(x, ifp) \ + (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE]) -u_long in_multihash; struct in_ifaddrhashhead * in_ifaddrhashtbl; u_long in_ifaddrhash; struct in_ifaddrhead in_ifaddrhead; @@ -179,6 +189,7 @@ in_init(void) &in_ifaddrhash); in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, &in_multihash); + rw_init(&in_multilock); in_sysctl_init(NULL); } @@ -1067,64 +1078,106 @@ in_broadcast(struct in_addr in, struct i } /* + * in_lookup_multi: look up the in_multi record for a given IP + * multicast address on a given interface. If no matching record is + * found, return NULL. + */ +struct in_multi * +in_lookup_multi(struct in_addr addr, ifnet_t *ifp) +{ + struct in_multi *inm; + + KASSERT(rw_lock_held(&in_multilock)); + + LIST_FOREACH(inm, &IN_MULTI_HASH(addr.s_addr, ifp), inm_list) { + if (in_hosteq(inm->inm_addr, addr) && inm->inm_ifp == ifp) + break; + } + return inm; +} + +/* + * in_multi_group: check whether the address belongs to an IP multicast + * group we are joined on this interface. Returns true or false. + */ +bool +in_multi_group(struct in_addr addr, ifnet_t *ifp, int flags) +{ + bool ingroup; + + if (__predict_true(flags & IP_IGMP_MCAST) == 0) { + rw_enter(&in_multilock, RW_READER); + ingroup = in_lookup_multi(addr, ifp) != NULL; + rw_exit(&in_multilock); + } else { + /* XXX Recursive call from ip_output(). */ + KASSERT(rw_lock_held(&in_multilock)); + ingroup = in_lookup_multi(addr, ifp) != NULL; + } + return ingroup; +} + +/* * Add an address to the list of IP multicast addresses for a given interface. */ struct in_multi * -in_addmulti(struct in_addr *ap, struct ifnet *ifp) +in_addmulti(struct in_addr *ap, ifnet_t *ifp) { struct sockaddr_in sin; struct in_multi *inm; - int s = splsoftnet(); /* * See if address already in list. */ - IN_LOOKUP_MULTI(*ap, ifp, inm); + rw_enter(&in_multilock, RW_WRITER); + inm = in_lookup_multi(*ap, ifp); if (inm != NULL) { /* * Found it; just increment the reference count. */ - ++inm->inm_refcount; - } else { - /* - * New address; allocate a new multicast record - * and link it into the interface's multicast list. - */ - inm = pool_get(&inmulti_pool, PR_NOWAIT); - if (inm == NULL) { - splx(s); - return (NULL); - } - inm->inm_addr = *ap; - inm->inm_ifp = ifp; - inm->inm_refcount = 1; - LIST_INSERT_HEAD( - &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp), - inm, inm_list); - /* - * Ask the network driver to update its multicast reception - * filter appropriately for the new address. - */ - sockaddr_in_init(&sin, ap, 0); - if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) { - LIST_REMOVE(inm, inm_list); - pool_put(&inmulti_pool, inm); - splx(s); - return (NULL); - } - /* - * Let IGMP know that we have joined a new IP multicast group. - */ - if (igmp_joingroup(inm) != 0) { - LIST_REMOVE(inm, inm_list); - pool_put(&inmulti_pool, inm); - splx(s); - return (NULL); - } - in_multientries++; + inm->inm_refcount++; + rw_exit(&in_multilock); + return inm; } - splx(s); - return (inm); + + /* + * New address; allocate a new multicast record. + */ + inm = pool_get(&inmulti_pool, PR_NOWAIT); + if (inm == NULL) { + rw_exit(&in_multilock); + return NULL; + } + inm->inm_addr = *ap; + inm->inm_ifp = ifp; + inm->inm_refcount = 1; + + /* + * Ask the network driver to update its multicast reception + * filter appropriately for the new address. + */ + sockaddr_in_init(&sin, ap, 0); + if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) { + rw_exit(&in_multilock); + pool_put(&inmulti_pool, inm); + return NULL; + } + + /* + * Let IGMP know that we have joined a new IP multicast group. + */ + if (igmp_joingroup(inm) != 0) { + rw_exit(&in_multilock); + pool_put(&inmulti_pool, inm); + return NULL; + } + LIST_INSERT_HEAD( + &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp), + inm, inm_list); + in_multientries++; + rw_exit(&in_multilock); + + return inm; } /* @@ -1134,28 +1187,85 @@ void in_delmulti(struct in_multi *inm) { struct sockaddr_in sin; - int s = splsoftnet(); - if (--inm->inm_refcount == 0) { - /* - * No remaining claims to this record; let IGMP know that - * we are leaving the multicast group. - */ - igmp_leavegroup(inm); - /* - * Unlink from list. - */ - LIST_REMOVE(inm, inm_list); - in_multientries--; - /* - * Notify the network driver to update its multicast reception - * filter. - */ - sockaddr_in_init(&sin, &inm->inm_addr, 0); - if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin)); - pool_put(&inmulti_pool, inm); + rw_enter(&in_multilock, RW_WRITER); + if (--inm->inm_refcount > 0) { + rw_exit(&in_multilock); + return; } - splx(s); + + /* + * No remaining claims to this record; let IGMP know that + * we are leaving the multicast group. + */ + igmp_leavegroup(inm); + + /* + * Notify the network driver to update its multicast reception + * filter. + */ + sockaddr_in_init(&sin, &inm->inm_addr, 0); + if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin)); + + /* + * Unlink from list. + */ + LIST_REMOVE(inm, inm_list); + in_multientries--; + rw_exit(&in_multilock); + + pool_put(&inmulti_pool, inm); +} + +/* + * in_next_multi: step through all of the in_multi records, one at a time. + * The current position is remembered in "step", which the caller must + * provide. in_first_multi(), below, must be called to initialize "step" + * and get the first record. Both macros return a NULL "inm" when there + * are no remaining records. + */ +struct in_multi * +in_next_multi(struct in_multistep *step) +{ + struct in_multi *inm; + + KASSERT(rw_lock_held(&in_multilock)); + + while (step->i_inm == NULL && step->i_n < IN_MULTI_HASH_SIZE) { + step->i_inm = LIST_FIRST(&in_multihashtbl[++step->i_n]); + } + if ((inm = step->i_inm) != NULL) { + step->i_inm = LIST_NEXT(inm, inm_list); + } + return inm; +} + +struct in_multi * +in_first_multi(struct in_multistep *step) +{ + KASSERT(rw_lock_held(&in_multilock)); + + step->i_n = 0; + step->i_inm = LIST_FIRST(&in_multihashtbl[0]); + return in_next_multi(step); +} + +void +in_multi_lock(int op) +{ + rw_enter(&in_multilock, op); +} + +void +in_multi_unlock(void) +{ + rw_exit(&in_multilock); +} + +int +in_multi_lock_held(void) +{ + return rw_lock_held(&in_multilock); } struct sockaddr_in * Index: src/sys/netinet/in_var.h diff -u src/sys/netinet/in_var.h:1.67 src/sys/netinet/in_var.h:1.68 --- src/sys/netinet/in_var.h:1.67 Fri May 23 19:27:48 2014 +++ src/sys/netinet/in_var.h Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: in_var.h,v 1.67 2014/05/23 19:27:48 rmind Exp $ */ +/* $NetBSD: in_var.h,v 1.68 2014/05/29 23:02:48 rmind Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -107,13 +107,11 @@ struct in_aliasreq { */ #define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr)) +#ifdef _KERNEL -#ifdef _KERNEL +/* Note: 61, 127, 251, 509, 1021, 2039 are good. */ #ifndef IN_IFADDR_HASH_SIZE -#define IN_IFADDR_HASH_SIZE 509 /* 61, 127, 251, 509, 1021, 2039 are good */ -#endif -#ifndef IN_MULTI_HASH_SIZE -#define IN_MULTI_HASH_SIZE 509 /* 61, 127, 251, 509, 1021, 2039 are good */ +#define IN_IFADDR_HASH_SIZE 509 #endif /* @@ -123,20 +121,14 @@ struct in_aliasreq { */ #define IN_IFADDR_HASH(x) in_ifaddrhashtbl[(u_long)(x) % IN_IFADDR_HASH_SIZE] -#define IN_MULTI_HASH(x, ifp) \ - (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE]) LIST_HEAD(in_ifaddrhashhead, in_ifaddr); /* Type of the hash head */ TAILQ_HEAD(in_ifaddrhead, in_ifaddr); /* Type of the list head */ -LIST_HEAD(in_multihashhead, in_multi); /* Type of the hash head */ extern u_long in_ifaddrhash; /* size of hash table - 1 */ extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* Hash table head */ extern struct in_ifaddrhead in_ifaddrhead; /* List head (in ip_input) */ -extern u_long in_multihash; /* size of hash table - 1 */ -extern struct in_multihashhead *in_multihashtbl; /* Hash table head */ - extern struct ifqueue ipintrq; /* ip packet input queue */ extern const int inetctlerrmap[]; @@ -203,21 +195,13 @@ extern const int inetctlerrmap[]; #endif /* - * Per-interface router version information. - */ -struct router_info { - LIST_ENTRY(router_info) rti_link; - struct ifnet *rti_ifp; - int rti_type; /* type of router on this interface */ - int rti_age; /* time since last v1 query */ -}; - -/* * Internet multicast address structure. There is one of these for each IP * multicast group to which this host belongs on a given network interface. * They are kept in a linked list, rooted in the interface's in_ifaddr * structure. */ +struct router_info; + struct in_multi { LIST_ENTRY(in_multi) inm_list; /* list of multicast addresses */ struct router_info *inm_rti; /* router version info */ @@ -230,55 +214,24 @@ struct in_multi { #ifdef _KERNEL /* - * Structure used by macros below to remember position when stepping through - * all of the in_multi records. + * Structure used by functions below to remember position when stepping + * through all of the in_multi records. */ struct in_multistep { int i_n; struct in_multi *i_inm; }; -/* - * Macro for looking up the in_multi record for a given IP multicast address - * on a given interface. If no matching record is found, "inm" returns NULL. - */ -#define IN_LOOKUP_MULTI(addr, ifp, inm) \ - /* struct in_addr addr; */ \ - /* struct ifnet *ifp; */ \ - /* struct in_multi *inm; */ \ -{ \ - LIST_FOREACH((inm), &IN_MULTI_HASH(((addr).s_addr), (ifp)), inm_list) {\ - if (in_hosteq((inm)->inm_addr, (addr)) && \ - (inm)->inm_ifp == (ifp)) \ - break; \ - } \ -} - -/* - * Macro to step through all of the in_multi records, one at a time. - * The current position is remembered in "step", which the caller must - * provide. IN_FIRST_MULTI(), below, must be called to initialize "step" - * and get the first record. Both macros return a NULL "inm" when there - * are no remaining records. - */ -#define IN_NEXT_MULTI(step, inm) \ - /* struct in_multistep step; */ \ - /* struct in_multi *inm; */ \ -{ \ - while ((step).i_inm == NULL && (step).i_n < IN_MULTI_HASH_SIZE) \ - (step).i_inm = LIST_FIRST(&in_multihashtbl[++(step).i_n]); \ - if (((inm) = (step).i_inm) != NULL) \ - (step).i_inm = LIST_NEXT((inm), inm_list); \ -} - -#define IN_FIRST_MULTI(step, inm) \ - /* struct in_multistep step; */ \ - /* struct in_multi *inm; */ \ -{ \ - (step).i_n = 0; \ - (step).i_inm = LIST_FIRST(&in_multihashtbl[0]); \ - IN_NEXT_MULTI((step), (inm)); \ -} +bool in_multi_group(struct in_addr, struct ifnet *, int); +struct in_multi *in_first_multi(struct in_multistep *); +struct in_multi *in_next_multi(struct in_multistep *); +struct in_multi *in_lookup_multi(struct in_addr, struct ifnet *); +struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); +void in_delmulti(struct in_multi *); + +void in_multi_lock(int); +void in_multi_unlock(void); +int in_multi_lock_held(void); struct ifaddr; @@ -287,8 +240,6 @@ int in_ifinit(struct ifnet *, void in_savemkludge(struct in_ifaddr *); void in_restoremkludge(struct in_ifaddr *, struct ifnet *); void in_purgemkludge(struct ifnet *); -struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); -void in_delmulti(struct in_multi *); void in_ifscrub(struct ifnet *, struct in_ifaddr *); void in_setmaxmtu(void); const char *in_fmtaddr(struct in_addr); Index: src/sys/netinet/ip_carp.c diff -u src/sys/netinet/ip_carp.c:1.55 src/sys/netinet/ip_carp.c:1.56 --- src/sys/netinet/ip_carp.c:1.55 Sat May 17 20:44:24 2014 +++ src/sys/netinet/ip_carp.c Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_carp.c,v 1.55 2014/05/17 20:44:24 rmind Exp $ */ +/* $NetBSD: ip_carp.c,v 1.56 2014/05/29 23:02:48 rmind Exp $ */ /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */ /* @@ -31,7 +31,7 @@ #include "opt_mbuftrace.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.55 2014/05/17 20:44:24 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.56 2014/05/29 23:02:48 rmind Exp $"); /* * TODO: @@ -1670,15 +1670,13 @@ carp_addr_updated(void *v) /* Handle a callback after SIOCDIFADDR */ if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { struct in_addr mc_addr; - struct in_multi *inm; sc->sc_naddrs = new_naddrs; sc->sc_naddrs6 = new_naddrs6; /* Re-establish multicast membership removed by in_control */ mc_addr.s_addr = INADDR_CARP_GROUP; - IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm); - if (inm == NULL) { + if (!in_multi_group(mc_addr, &sc->sc_if, 0)) { memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) Index: src/sys/netinet/ip_input.c diff -u src/sys/netinet/ip_input.c:1.315 src/sys/netinet/ip_input.c:1.316 --- src/sys/netinet/ip_input.c:1.315 Wed May 28 19:19:33 2014 +++ src/sys/netinet/ip_input.c Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.315 2014/05/28 19:19:33 christos Exp $ */ +/* $NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,7 +91,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.315 2014/05/28 19:19:33 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $"); #include "opt_inet.h" #include "opt_compat_netbsd.h" @@ -632,7 +632,6 @@ ip_input(struct mbuf *m) } } if (IN_MULTICAST(ip->ip_dst.s_addr)) { - struct in_multi *inm; #ifdef MROUTING extern struct socket *ip_mrouter; @@ -669,8 +668,7 @@ ip_input(struct mbuf *m) * See if we belong to the destination multicast group on the * arrival interface. */ - IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); - if (inm == NULL) { + if (!in_multi_group(ip->ip_dst, ifp, 0)) { IP_STATINC(IP_STAT_CANTFORWARD); m_freem(m); return; Index: src/sys/netinet/ip_output.c diff -u src/sys/netinet/ip_output.c:1.227 src/sys/netinet/ip_output.c:1.228 --- src/sys/netinet/ip_output.c:1.227 Fri May 23 00:02:14 2014 +++ src/sys/netinet/ip_output.c Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_output.c,v 1.227 2014/05/23 00:02:14 rmind Exp $ */ +/* $NetBSD: ip_output.c,v 1.228 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,7 +91,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.227 2014/05/23 00:02:14 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.228 2014/05/29 23:02:48 rmind Exp $"); #include "opt_inet.h" #include "opt_ipsec.h" @@ -278,7 +278,7 @@ ip_output(struct mbuf *m0, ...) if (IN_MULTICAST(ip->ip_dst.s_addr) || (ip->ip_dst.s_addr == INADDR_BROADCAST)) { - struct in_multi *inm; + bool inmgroup; m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? M_BCAST : M_MCAST; @@ -331,9 +331,8 @@ ip_output(struct mbuf *m0, ...) ip->ip_src = xia->ia_addr.sin_addr; } - IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); - if (inm != NULL && - (imo == NULL || imo->imo_multicast_loop)) { + inmgroup = in_multi_group(ip->ip_dst, ifp, flags); + if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) { /* * If we belong to the destination multicast group * on the outgoing interface, and the caller did not Index: src/sys/netinet/ip_var.h diff -u src/sys/netinet/ip_var.h:1.103 src/sys/netinet/ip_var.h:1.104 --- src/sys/netinet/ip_var.h:1.103 Fri May 23 19:35:24 2014 +++ src/sys/netinet/ip_var.h Thu May 29 23:02:48 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_var.h,v 1.103 2014/05/23 19:35:24 rmind Exp $ */ +/* $NetBSD: ip_var.h,v 1.104 2014/05/29 23:02:48 rmind Exp $ */ /* * Copyright (c) 1982, 1986, 1993 @@ -166,13 +166,20 @@ struct ip_moptions { #include "opt_mbuftrace.h" #endif -/* flags passed to ip_output as last parameter */ -#define IP_FORWARDING 0x1 /* most of ip header exists */ -#define IP_RAWOUTPUT 0x2 /* raw ip header exists */ -#define IP_RETURNMTU 0x4 /* pass back mtu on EMSGSIZE */ -#define IP_NOIPNEWID 0x8 /* don't fill in ip_id */ +/* + * The following flags can be passed to ip_output() as last parameter + */ +#define IP_FORWARDING 0x0001 /* most of ip header exists */ +#define IP_RAWOUTPUT 0x0002 /* raw ip header exists */ +#define IP_RETURNMTU 0x0004 /* pass back mtu on EMSGSIZE */ +#define IP_NOIPNEWID 0x0008 /* don't fill in ip_id */ + +CTASSERT(SO_DONTROUTE == 0x0010); +CTASSERT(SO_BROADCAST == 0x0020); #define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */ + +#define IP_IGMP_MCAST 0x0040 /* IGMP for mcast join/leave */ #define IP_MTUDISC 0x0400 /* Path MTU Discovery; set DF */ extern struct domain inetdomain;