Module Name: src
Committed By: rmind
Date: Thu May 29 23:02:48 UTC 2014
Modified Files:
src/sys/netinet: igmp.c igmp.h igmp_var.h in.c in_var.h ip_carp.c
ip_input.c ip_output.c ip_var.h
Log Message:
Make IGMP and multicast group management code MP-safe. Use a read-write
lock to protect the hash table of multicast address records; also, make it
private and eliminate some macros. In the long term, the lookup path ought
to be optimised.
To generate a diff of this commit:
cvs rdiff -u -r1.54 -r1.55 src/sys/netinet/igmp.c
cvs rdiff -u -r1.11 -r1.12 src/sys/netinet/igmp.h
cvs rdiff -u -r1.23 -r1.24 src/sys/netinet/igmp_var.h
cvs rdiff -u -r1.145 -r1.146 src/sys/netinet/in.c
cvs rdiff -u -r1.67 -r1.68 src/sys/netinet/in_var.h
cvs rdiff -u -r1.55 -r1.56 src/sys/netinet/ip_carp.c
cvs rdiff -u -r1.315 -r1.316 src/sys/netinet/ip_input.c
cvs rdiff -u -r1.227 -r1.228 src/sys/netinet/ip_output.c
cvs rdiff -u -r1.103 -r1.104 src/sys/netinet/ip_var.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/netinet/igmp.c
diff -u src/sys/netinet/igmp.c:1.54 src/sys/netinet/igmp.c:1.55
--- src/sys/netinet/igmp.c:1.54 Tue Feb 25 18:30:12 2014
+++ src/sys/netinet/igmp.c Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: igmp.c,v 1.54 2014/02/25 18:30:12 pooka Exp $ */
+/* $NetBSD: igmp.c,v 1.55 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -40,7 +40,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.54 2014/02/25 18:30:12 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.55 2014/05/29 23:02:48 rmind Exp $");
#include "opt_mrouting.h"
@@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.5
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/systm.h>
+#include <sys/cprng.h>
#include <sys/sysctl.h>
#include <net/if.h>
@@ -64,83 +65,107 @@ __KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.5
#include <netinet/igmp.h>
#include <netinet/igmp_var.h>
-#define IP_MULTICASTOPTS 0
-
-static struct pool igmp_rti_pool;
+/*
+ * Per-interface router version information.
+ */
+typedef struct router_info {
+ LIST_ENTRY(router_info) rti_link;
+ ifnet_t * rti_ifp;
+ int rti_type; /* type of router on this interface */
+ int rti_age; /* time since last v1 query */
+} router_info_t;
-static percpu_t *igmpstat_percpu;
+/*
+ * The router-info list and the timer flag are protected by in_multilock.
+ *
+ * Lock order:
+ *
+ * softnet_lock ->
+ * in_multilock
+ */
+static struct pool igmp_rti_pool __cacheline_aligned;
+static LIST_HEAD(, router_info) rti_head __cacheline_aligned;
+static int igmp_timers_on __cacheline_aligned;
+static percpu_t * igmpstat_percpu __read_mostly;
#define IGMP_STATINC(x) _NET_STATINC(igmpstat_percpu, x)
-int igmp_timers_are_running;
-static LIST_HEAD(, router_info) rti_head = LIST_HEAD_INITIALIZER(rti_head);
-
-void igmp_sendpkt(struct in_multi *, int);
-static int rti_fill(struct in_multi *);
-static struct router_info *rti_find(struct ifnet *);
-static void rti_delete(struct ifnet *);
-
-static void sysctl_net_inet_igmp_setup(struct sysctllog **);
+static void igmp_sendpkt(struct in_multi *, int);
+static int rti_fill(struct in_multi *);
+static router_info_t * rti_find(struct ifnet *);
+static void rti_delete(struct ifnet *);
+static void sysctl_net_inet_igmp_setup(struct sysctllog **);
+/*
+ * rti_fill: associate router information with the given multicast group;
+ * if there is no router information for the interface, then create it.
+ */
static int
rti_fill(struct in_multi *inm)
{
- struct router_info *rti;
+ router_info_t *rti;
+
+ KASSERT(in_multi_lock_held());
- /* this function is called at splsoftnet() */
LIST_FOREACH(rti, &rti_head, rti_link) {
if (rti->rti_ifp == inm->inm_ifp) {
inm->inm_rti = rti;
- if (rti->rti_type == IGMP_v1_ROUTER)
- return (IGMP_v1_HOST_MEMBERSHIP_REPORT);
- else
- return (IGMP_v2_HOST_MEMBERSHIP_REPORT);
+ return rti->rti_type == IGMP_v1_ROUTER ?
+ IGMP_v1_HOST_MEMBERSHIP_REPORT :
+ IGMP_v2_HOST_MEMBERSHIP_REPORT;
}
}
-
rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
- if (rti == NULL)
+ if (rti == NULL) {
return 0;
+ }
rti->rti_ifp = inm->inm_ifp;
rti->rti_type = IGMP_v2_ROUTER;
LIST_INSERT_HEAD(&rti_head, rti, rti_link);
inm->inm_rti = rti;
- return (IGMP_v2_HOST_MEMBERSHIP_REPORT);
+ return IGMP_v2_HOST_MEMBERSHIP_REPORT;
}
-static struct router_info *
-rti_find(struct ifnet *ifp)
+/*
+ * rti_find: lookup or create router information for the given interface.
+ */
+static router_info_t *
+rti_find(ifnet_t *ifp)
{
- struct router_info *rti;
- int s = splsoftnet();
+ router_info_t *rti;
+
+ KASSERT(in_multi_lock_held());
LIST_FOREACH(rti, &rti_head, rti_link) {
if (rti->rti_ifp == ifp)
- return (rti);
+ return rti;
}
-
rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
if (rti == NULL) {
- splx(s);
return NULL;
}
rti->rti_ifp = ifp;
rti->rti_type = IGMP_v2_ROUTER;
LIST_INSERT_HEAD(&rti_head, rti, rti_link);
- splx(s);
- return (rti);
+ return rti;
}
+/*
+ * rti_delete: remove and free the router information entry for the
+ * given interface.
+ */
static void
-rti_delete(struct ifnet *ifp) /* MUST be called at splsoftnet */
+rti_delete(ifnet_t *ifp)
{
- struct router_info *rti;
+ router_info_t *rti;
+
+ KASSERT(in_multi_lock_held());
LIST_FOREACH(rti, &rti_head, rti_link) {
if (rti->rti_ifp == ifp) {
LIST_REMOVE(rti, rti_link);
pool_put(&igmp_rti_pool, rti);
- return;
+ break;
}
}
}
@@ -148,29 +173,24 @@ rti_delete(struct ifnet *ifp) /* MUST be
void
igmp_init(void)
{
-
- sysctl_net_inet_igmp_setup(NULL);
- pool_init(&igmp_rti_pool, sizeof(struct router_info), 0, 0, 0,
+ pool_init(&igmp_rti_pool, sizeof(router_info_t), 0, 0, 0,
"igmppl", NULL, IPL_SOFTNET);
igmpstat_percpu = percpu_alloc(sizeof(uint64_t) * IGMP_NSTATS);
+ sysctl_net_inet_igmp_setup(NULL);
+ LIST_INIT(&rti_head);
}
void
igmp_input(struct mbuf *m, ...)
{
- int proto;
- int iphlen;
- struct ifnet *ifp = m->m_pkthdr.rcvif;
+ ifnet_t *ifp = m->m_pkthdr.rcvif;
struct ip *ip = mtod(m, struct ip *);
struct igmp *igmp;
- u_int minlen;
+ u_int minlen, timer;
struct in_multi *inm;
- struct in_multistep step;
- struct router_info *rti;
struct in_ifaddr *ia;
- u_int timer;
+ int proto, ip_len, iphlen;
va_list ap;
- u_int16_t ip_len;
va_start(ap, m);
iphlen = va_arg(ap, int);
@@ -222,11 +242,8 @@ igmp_input(struct mbuf *m, ...)
break;
if (igmp->igmp_code == 0) {
- rti = rti_find(ifp);
- if (rti == NULL)
- break;
- rti->rti_type = IGMP_v1_ROUTER;
- rti->rti_age = 0;
+ struct in_multistep step;
+ router_info_t *rti;
if (ip->ip_dst.s_addr != INADDR_ALLHOSTS_GROUP) {
IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES);
@@ -234,13 +251,23 @@ igmp_input(struct mbuf *m, ...)
return;
}
+ in_multi_lock(RW_WRITER);
+ rti = rti_find(ifp);
+ if (rti == NULL) {
+ in_multi_unlock();
+ break;
+ }
+ rti->rti_type = IGMP_v1_ROUTER;
+ rti->rti_age = 0;
+
/*
* Start the timers in all of our membership records
* for the interface on which the query arrived,
* except those that are already running and those
* that belong to a "local" group (224.0.0.X).
*/
- IN_FIRST_MULTI(step, inm);
+
+ inm = in_first_multi(&step);
while (inm != NULL) {
if (inm->inm_ifp == ifp &&
inm->inm_timer == 0 &&
@@ -248,11 +275,14 @@ igmp_input(struct mbuf *m, ...)
inm->inm_state = IGMP_DELAYING_MEMBER;
inm->inm_timer = IGMP_RANDOM_DELAY(
IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ);
- igmp_timers_are_running = 1;
+ igmp_timers_on = true;
}
- IN_NEXT_MULTI(step, inm);
+ inm = in_next_multi(&step);
}
+ in_multi_unlock();
} else {
+ struct in_multistep step;
+
if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES);
m_freem(m);
@@ -261,7 +291,7 @@ igmp_input(struct mbuf *m, ...)
timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
if (timer == 0)
- timer =1;
+ timer = 1;
/*
* Start the timers in all of our membership records
@@ -271,7 +301,8 @@ igmp_input(struct mbuf *m, ...)
* timers already running, check if they need to be
* reset.
*/
- IN_FIRST_MULTI(step, inm);
+ in_multi_lock(RW_WRITER);
+ inm = in_first_multi(&step);
while (inm != NULL) {
if (inm->inm_ifp == ifp &&
!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
@@ -289,7 +320,7 @@ igmp_input(struct mbuf *m, ...)
IGMP_DELAYING_MEMBER;
inm->inm_timer =
IGMP_RANDOM_DELAY(timer);
- igmp_timers_are_running = 1;
+ igmp_timers_on = true;
break;
case IGMP_SLEEPING_MEMBER:
inm->inm_state =
@@ -297,8 +328,9 @@ igmp_input(struct mbuf *m, ...)
break;
}
}
- IN_NEXT_MULTI(step, inm);
+ inm = in_next_multi(&step);
}
+ in_multi_unlock();
}
break;
@@ -335,7 +367,8 @@ igmp_input(struct mbuf *m, ...)
* If we belong to the group being reported, stop
* our timer for that group.
*/
- IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+ in_multi_lock(RW_WRITER);
+ inm = in_lookup_multi(igmp->igmp_group, ifp);
if (inm != NULL) {
inm->inm_timer = 0;
IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS);
@@ -355,7 +388,7 @@ igmp_input(struct mbuf *m, ...)
break;
}
}
-
+ in_multi_unlock();
break;
case IGMP_v2_HOST_MEMBERSHIP_REPORT:
@@ -403,7 +436,8 @@ igmp_input(struct mbuf *m, ...)
* If we belong to the group being reported, stop
* our timer for that group.
*/
- IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+ in_multi_lock(RW_WRITER);
+ inm = in_lookup_multi(igmp->igmp_group, ifp);
if (inm != NULL) {
inm->inm_timer = 0;
IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS);
@@ -419,7 +453,7 @@ igmp_input(struct mbuf *m, ...)
break;
}
}
-
+ in_multi_unlock();
break;
}
@@ -435,32 +469,32 @@ igmp_input(struct mbuf *m, ...)
int
igmp_joingroup(struct in_multi *inm)
{
- int report_type;
- int s = splsoftnet();
-
+ KASSERT(in_multi_lock_held());
inm->inm_state = IGMP_IDLE_MEMBER;
if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
(inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0) {
+ int report_type;
+
report_type = rti_fill(inm);
if (report_type == 0) {
- splx(s);
return ENOMEM;
}
igmp_sendpkt(inm, report_type);
inm->inm_state = IGMP_DELAYING_MEMBER;
inm->inm_timer = IGMP_RANDOM_DELAY(
IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ);
- igmp_timers_are_running = 1;
+ igmp_timers_on = true;
} else
inm->inm_timer = 0;
- splx(s);
+
return 0;
}
void
igmp_leavegroup(struct in_multi *inm)
{
+ KASSERT(in_multi_lock_held());
switch (inm->inm_state) {
case IGMP_DELAYING_MEMBER:
@@ -487,14 +521,16 @@ igmp_fasttimo(void)
* Quick check to see if any work needs to be done, in order
* to minimize the overhead of fasttimo processing.
*/
- if (!igmp_timers_are_running)
+ if (!igmp_timers_on) {
return;
+ }
+ /* XXX: Needed for ip_output(). */
mutex_enter(softnet_lock);
- KERNEL_LOCK(1, NULL);
- igmp_timers_are_running = 0;
- IN_FIRST_MULTI(step, inm);
+ in_multi_lock(RW_WRITER);
+ igmp_timers_on = false;
+ inm = in_first_multi(&step);
while (inm != NULL) {
if (inm->inm_timer == 0) {
/* do nothing */
@@ -509,46 +545,47 @@ igmp_fasttimo(void)
inm->inm_state = IGMP_IDLE_MEMBER;
}
} else {
- igmp_timers_are_running = 1;
+ igmp_timers_on = true;
}
- IN_NEXT_MULTI(step, inm);
+ inm = in_next_multi(&step);
}
-
- KERNEL_UNLOCK_ONE(NULL);
+ in_multi_unlock();
mutex_exit(softnet_lock);
}
void
igmp_slowtimo(void)
{
- struct router_info *rti;
+ router_info_t *rti;
- mutex_enter(softnet_lock);
- KERNEL_LOCK(1, NULL);
+ in_multi_lock(RW_WRITER);
LIST_FOREACH(rti, &rti_head, rti_link) {
if (rti->rti_type == IGMP_v1_ROUTER &&
++rti->rti_age >= IGMP_AGE_THRESHOLD) {
rti->rti_type = IGMP_v2_ROUTER;
}
}
- KERNEL_UNLOCK_ONE(NULL);
- mutex_exit(softnet_lock);
+ in_multi_unlock();
}
-void
+/*
+ * igmp_sendpkt: construct an IGMP packet, given the multicast structure
+ * and the type, and send the datagram.
+ */
+static void
igmp_sendpkt(struct in_multi *inm, int type)
{
struct mbuf *m;
struct igmp *igmp;
struct ip *ip;
struct ip_moptions imo;
-#ifdef MROUTING
- extern struct socket *ip_mrouter;
-#endif /* MROUTING */
+
+ KASSERT(in_multi_lock_held());
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == NULL)
return;
+
/*
* Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN
* is smaller than mbuf size returned by MGETHDR.
@@ -586,33 +623,38 @@ igmp_sendpkt(struct in_multi *inm, int t
* router, so that the process-level routing demon can hear it.
*/
#ifdef MROUTING
+ extern struct socket *ip_mrouter;
imo.imo_multicast_loop = (ip_mrouter != NULL);
#else
imo.imo_multicast_loop = 0;
-#endif /* MROUTING */
-
- ip_output(m, NULL, NULL, IP_MULTICASTOPTS, &imo, NULL);
+#endif
+ /*
+ * Note: IP_IGMP_MCAST indicates that in_multilock is held.
+ * The caller must still acquire softnet_lock for ip_output().
+ */
+ KASSERT(mutex_owned(softnet_lock));
+ ip_output(m, NULL, NULL, IP_IGMP_MCAST, &imo, NULL);
IGMP_STATINC(IGMP_STAT_SND_REPORTS);
}
void
-igmp_purgeif(struct ifnet *ifp) /* MUST be called at splsoftnet() */
+igmp_purgeif(ifnet_t *ifp)
{
- rti_delete(ifp); /* manipulates pools */
+ in_multi_lock(RW_WRITER);
+ rti_delete(ifp);
+ in_multi_unlock();
}
static int
sysctl_net_inet_igmp_stats(SYSCTLFN_ARGS)
{
-
- return (NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS));
+ return NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS);
}
static void
sysctl_net_inet_igmp_setup(struct sysctllog **clog)
{
-
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "inet", NULL,
@@ -624,7 +666,6 @@ sysctl_net_inet_igmp_setup(struct sysctl
SYSCTL_DESCR("Internet Group Management Protocol"),
NULL, 0, NULL, 0,
CTL_NET, PF_INET, IPPROTO_IGMP, CTL_EOL);
-
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_STRUCT, "stats",
Index: src/sys/netinet/igmp.h
diff -u src/sys/netinet/igmp.h:1.11 src/sys/netinet/igmp.h:1.12
--- src/sys/netinet/igmp.h:1.11 Tue Dec 25 18:33:46 2007
+++ src/sys/netinet/igmp.h Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: igmp.h,v 1.11 2007/12/25 18:33:46 perry Exp $ */
+/* $NetBSD: igmp.h,v 1.12 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -84,13 +84,13 @@
* IGMP packet format.
*/
struct igmp {
- u_int8_t igmp_type; /* version & type of IGMP message */
- u_int8_t igmp_code; /* code for routing sub-messages */
- u_int16_t igmp_cksum; /* IP-style checksum */
+ uint8_t igmp_type; /* version & type of IGMP message */
+ uint8_t igmp_code; /* code for routing sub-messages */
+ uint16_t igmp_cksum; /* IP-style checksum */
struct in_addr igmp_group; /* group address being reported */
} __packed; /* (zero for queries) */
-#define IGMP_MINLEN 8
+#define IGMP_MINLEN 8
#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* membership query */
#define IGMP_v1_HOST_MEMBERSHIP_REPORT 0x12 /* v1 membership report */
Index: src/sys/netinet/igmp_var.h
diff -u src/sys/netinet/igmp_var.h:1.23 src/sys/netinet/igmp_var.h:1.24
--- src/sys/netinet/igmp_var.h:1.23 Tue Apr 15 16:02:03 2008
+++ src/sys/netinet/igmp_var.h Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: igmp_var.h,v 1.23 2008/04/15 16:02:03 thorpej Exp $ */
+/* $NetBSD: igmp_var.h,v 1.24 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -103,7 +103,7 @@
* DELAY * countdown frequency). We assume that the routine random()
* is defined somewhere (and that it returns a positive number).
*/
-#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
+#define IGMP_RANDOM_DELAY(X) (cprng_fast32() % (X) + 1)
#ifdef __NO_STRICT_ALIGNMENT
#define IGMP_HDR_ALIGNED_P(ig) 1
Index: src/sys/netinet/in.c
diff -u src/sys/netinet/in.c:1.145 src/sys/netinet/in.c:1.146
--- src/sys/netinet/in.c:1.145 Thu May 22 22:01:12 2014
+++ src/sys/netinet/in.c Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: in.c,v 1.145 2014/05/22 22:01:12 rmind Exp $ */
+/* $NetBSD: in.c,v 1.146 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.145 2014/05/22 22:01:12 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.146 2014/05/29 23:02:48 rmind Exp $");
#include "opt_inet.h"
#include "opt_inet_conf.h"
@@ -149,6 +149,11 @@ static void in_sysctl_init(struct sysctl
#define HOSTZEROBROADCAST 1
#endif
+/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
+#ifndef IN_MULTI_HASH_SIZE
+#define IN_MULTI_HASH_SIZE 509
+#endif
+
static int subnetsarelocal = SUBNETSARELOCAL;
static int hostzeroisbroadcast = HOSTZEROBROADCAST;
@@ -157,13 +162,18 @@ static int hostzeroisbroadcast = HOSTZ
* deleted interface addresses. We use in_ifaddr so that a chain head
* won't be deallocated until all multicast address record are deleted.
*/
-static TAILQ_HEAD(, in_ifaddr) in_mk = TAILQ_HEAD_INITIALIZER(in_mk);
+
+LIST_HEAD(in_multihashhead, in_multi); /* Type of the hash head */
static struct pool inmulti_pool;
static u_int in_multientries;
-struct in_multihashhead * in_multihashtbl;
+static struct in_multihashhead *in_multihashtbl;
+static u_long in_multihash;
+static krwlock_t in_multilock;
+
+#define IN_MULTI_HASH(x, ifp) \
+ (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])
-u_long in_multihash;
struct in_ifaddrhashhead * in_ifaddrhashtbl;
u_long in_ifaddrhash;
struct in_ifaddrhead in_ifaddrhead;
@@ -179,6 +189,7 @@ in_init(void)
&in_ifaddrhash);
in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
&in_multihash);
+ rw_init(&in_multilock);
in_sysctl_init(NULL);
}
@@ -1067,64 +1078,106 @@ in_broadcast(struct in_addr in, struct i
}
/*
+ * in_lookup_multi: look up the in_multi record for a given IP
+ * multicast address on a given interface. If no matching record is
+ * found, return NULL.
+ */
+struct in_multi *
+in_lookup_multi(struct in_addr addr, ifnet_t *ifp)
+{
+ struct in_multi *inm;
+
+ KASSERT(rw_lock_held(&in_multilock));
+
+ LIST_FOREACH(inm, &IN_MULTI_HASH(addr.s_addr, ifp), inm_list) {
+ if (in_hosteq(inm->inm_addr, addr) && inm->inm_ifp == ifp)
+ break;
+ }
+ return inm;
+}
+
+/*
+ * in_multi_group: check whether the address belongs to an IP multicast
+ * group we are joined on this interface. Returns true or false.
+ */
+bool
+in_multi_group(struct in_addr addr, ifnet_t *ifp, int flags)
+{
+ bool ingroup;
+
+ if (__predict_true(flags & IP_IGMP_MCAST) == 0) {
+ rw_enter(&in_multilock, RW_READER);
+ ingroup = in_lookup_multi(addr, ifp) != NULL;
+ rw_exit(&in_multilock);
+ } else {
+ /* XXX Recursive call from ip_output(). */
+ KASSERT(rw_lock_held(&in_multilock));
+ ingroup = in_lookup_multi(addr, ifp) != NULL;
+ }
+ return ingroup;
+}
+
+/*
* Add an address to the list of IP multicast addresses for a given interface.
*/
struct in_multi *
-in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+in_addmulti(struct in_addr *ap, ifnet_t *ifp)
{
struct sockaddr_in sin;
struct in_multi *inm;
- int s = splsoftnet();
/*
* See if address already in list.
*/
- IN_LOOKUP_MULTI(*ap, ifp, inm);
+ rw_enter(&in_multilock, RW_WRITER);
+ inm = in_lookup_multi(*ap, ifp);
if (inm != NULL) {
/*
* Found it; just increment the reference count.
*/
- ++inm->inm_refcount;
- } else {
- /*
- * New address; allocate a new multicast record
- * and link it into the interface's multicast list.
- */
- inm = pool_get(&inmulti_pool, PR_NOWAIT);
- if (inm == NULL) {
- splx(s);
- return (NULL);
- }
- inm->inm_addr = *ap;
- inm->inm_ifp = ifp;
- inm->inm_refcount = 1;
- LIST_INSERT_HEAD(
- &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
- inm, inm_list);
- /*
- * Ask the network driver to update its multicast reception
- * filter appropriately for the new address.
- */
- sockaddr_in_init(&sin, ap, 0);
- if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
- LIST_REMOVE(inm, inm_list);
- pool_put(&inmulti_pool, inm);
- splx(s);
- return (NULL);
- }
- /*
- * Let IGMP know that we have joined a new IP multicast group.
- */
- if (igmp_joingroup(inm) != 0) {
- LIST_REMOVE(inm, inm_list);
- pool_put(&inmulti_pool, inm);
- splx(s);
- return (NULL);
- }
- in_multientries++;
+ inm->inm_refcount++;
+ rw_exit(&in_multilock);
+ return inm;
}
- splx(s);
- return (inm);
+
+ /*
+ * New address; allocate a new multicast record.
+ */
+ inm = pool_get(&inmulti_pool, PR_NOWAIT);
+ if (inm == NULL) {
+ rw_exit(&in_multilock);
+ return NULL;
+ }
+ inm->inm_addr = *ap;
+ inm->inm_ifp = ifp;
+ inm->inm_refcount = 1;
+
+ /*
+ * Ask the network driver to update its multicast reception
+ * filter appropriately for the new address.
+ */
+ sockaddr_in_init(&sin, ap, 0);
+ if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
+ rw_exit(&in_multilock);
+ pool_put(&inmulti_pool, inm);
+ return NULL;
+ }
+
+ /*
+ * Let IGMP know that we have joined a new IP multicast group.
+ */
+ if (igmp_joingroup(inm) != 0) {
+ rw_exit(&in_multilock);
+ pool_put(&inmulti_pool, inm);
+ return NULL;
+ }
+ LIST_INSERT_HEAD(
+ &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
+ inm, inm_list);
+ in_multientries++;
+ rw_exit(&in_multilock);
+
+ return inm;
}
/*
@@ -1134,28 +1187,85 @@ void
in_delmulti(struct in_multi *inm)
{
struct sockaddr_in sin;
- int s = splsoftnet();
- if (--inm->inm_refcount == 0) {
- /*
- * No remaining claims to this record; let IGMP know that
- * we are leaving the multicast group.
- */
- igmp_leavegroup(inm);
- /*
- * Unlink from list.
- */
- LIST_REMOVE(inm, inm_list);
- in_multientries--;
- /*
- * Notify the network driver to update its multicast reception
- * filter.
- */
- sockaddr_in_init(&sin, &inm->inm_addr, 0);
- if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));
- pool_put(&inmulti_pool, inm);
+ rw_enter(&in_multilock, RW_WRITER);
+ if (--inm->inm_refcount > 0) {
+ rw_exit(&in_multilock);
+ return;
}
- splx(s);
+
+ /*
+ * No remaining claims to this record; let IGMP know that
+ * we are leaving the multicast group.
+ */
+ igmp_leavegroup(inm);
+
+ /*
+ * Notify the network driver to update its multicast reception
+ * filter.
+ */
+ sockaddr_in_init(&sin, &inm->inm_addr, 0);
+ if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));
+
+ /*
+ * Unlink from list.
+ */
+ LIST_REMOVE(inm, inm_list);
+ in_multientries--;
+ rw_exit(&in_multilock);
+
+ pool_put(&inmulti_pool, inm);
+}
+
+/*
+ * in_next_multi: step through all of the in_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide. in_first_multi(), below, must be called to initialize "step"
+ * and get the first record. Both macros return a NULL "inm" when there
+ * are no remaining records.
+ */
+struct in_multi *
+in_next_multi(struct in_multistep *step)
+{
+ struct in_multi *inm;
+
+ KASSERT(rw_lock_held(&in_multilock));
+
+ while (step->i_inm == NULL && step->i_n < IN_MULTI_HASH_SIZE) {
+ step->i_inm = LIST_FIRST(&in_multihashtbl[++step->i_n]);
+ }
+ if ((inm = step->i_inm) != NULL) {
+ step->i_inm = LIST_NEXT(inm, inm_list);
+ }
+ return inm;
+}
+
+struct in_multi *
+in_first_multi(struct in_multistep *step)
+{
+ KASSERT(rw_lock_held(&in_multilock));
+
+ step->i_n = 0;
+ step->i_inm = LIST_FIRST(&in_multihashtbl[0]);
+ return in_next_multi(step);
+}
+
+void
+in_multi_lock(int op)
+{
+ rw_enter(&in_multilock, op);
+}
+
+void
+in_multi_unlock(void)
+{
+ rw_exit(&in_multilock);
+}
+
+int
+in_multi_lock_held(void)
+{
+ return rw_lock_held(&in_multilock);
}
struct sockaddr_in *
Index: src/sys/netinet/in_var.h
diff -u src/sys/netinet/in_var.h:1.67 src/sys/netinet/in_var.h:1.68
--- src/sys/netinet/in_var.h:1.67 Fri May 23 19:27:48 2014
+++ src/sys/netinet/in_var.h Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: in_var.h,v 1.67 2014/05/23 19:27:48 rmind Exp $ */
+/* $NetBSD: in_var.h,v 1.68 2014/05/29 23:02:48 rmind Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -107,13 +107,11 @@ struct in_aliasreq {
*/
#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
+#ifdef _KERNEL
-#ifdef _KERNEL
+/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
#ifndef IN_IFADDR_HASH_SIZE
-#define IN_IFADDR_HASH_SIZE 509 /* 61, 127, 251, 509, 1021, 2039 are good */
-#endif
-#ifndef IN_MULTI_HASH_SIZE
-#define IN_MULTI_HASH_SIZE 509 /* 61, 127, 251, 509, 1021, 2039 are good */
+#define IN_IFADDR_HASH_SIZE 509
#endif
/*
@@ -123,20 +121,14 @@ struct in_aliasreq {
*/
#define IN_IFADDR_HASH(x) in_ifaddrhashtbl[(u_long)(x) % IN_IFADDR_HASH_SIZE]
-#define IN_MULTI_HASH(x, ifp) \
- (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])
LIST_HEAD(in_ifaddrhashhead, in_ifaddr); /* Type of the hash head */
TAILQ_HEAD(in_ifaddrhead, in_ifaddr); /* Type of the list head */
-LIST_HEAD(in_multihashhead, in_multi); /* Type of the hash head */
extern u_long in_ifaddrhash; /* size of hash table - 1 */
extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* Hash table head */
extern struct in_ifaddrhead in_ifaddrhead; /* List head (in ip_input) */
-extern u_long in_multihash; /* size of hash table - 1 */
-extern struct in_multihashhead *in_multihashtbl; /* Hash table head */
-
extern struct ifqueue ipintrq; /* ip packet input queue */
extern const int inetctlerrmap[];
@@ -203,21 +195,13 @@ extern const int inetctlerrmap[];
#endif
/*
- * Per-interface router version information.
- */
-struct router_info {
- LIST_ENTRY(router_info) rti_link;
- struct ifnet *rti_ifp;
- int rti_type; /* type of router on this interface */
- int rti_age; /* time since last v1 query */
-};
-
-/*
* Internet multicast address structure. There is one of these for each IP
* multicast group to which this host belongs on a given network interface.
* They are kept in a linked list, rooted in the interface's in_ifaddr
* structure.
*/
+struct router_info;
+
struct in_multi {
LIST_ENTRY(in_multi) inm_list; /* list of multicast addresses */
struct router_info *inm_rti; /* router version info */
@@ -230,55 +214,24 @@ struct in_multi {
#ifdef _KERNEL
/*
- * Structure used by macros below to remember position when stepping through
- * all of the in_multi records.
+ * Structure used by functions below to remember position when stepping
+ * through all of the in_multi records.
*/
struct in_multistep {
int i_n;
struct in_multi *i_inm;
};
-/*
- * Macro for looking up the in_multi record for a given IP multicast address
- * on a given interface. If no matching record is found, "inm" returns NULL.
- */
-#define IN_LOOKUP_MULTI(addr, ifp, inm) \
- /* struct in_addr addr; */ \
- /* struct ifnet *ifp; */ \
- /* struct in_multi *inm; */ \
-{ \
- LIST_FOREACH((inm), &IN_MULTI_HASH(((addr).s_addr), (ifp)), inm_list) {\
- if (in_hosteq((inm)->inm_addr, (addr)) && \
- (inm)->inm_ifp == (ifp)) \
- break; \
- } \
-}
-
-/*
- * Macro to step through all of the in_multi records, one at a time.
- * The current position is remembered in "step", which the caller must
- * provide. IN_FIRST_MULTI(), below, must be called to initialize "step"
- * and get the first record. Both macros return a NULL "inm" when there
- * are no remaining records.
- */
-#define IN_NEXT_MULTI(step, inm) \
- /* struct in_multistep step; */ \
- /* struct in_multi *inm; */ \
-{ \
- while ((step).i_inm == NULL && (step).i_n < IN_MULTI_HASH_SIZE) \
- (step).i_inm = LIST_FIRST(&in_multihashtbl[++(step).i_n]); \
- if (((inm) = (step).i_inm) != NULL) \
- (step).i_inm = LIST_NEXT((inm), inm_list); \
-}
-
-#define IN_FIRST_MULTI(step, inm) \
- /* struct in_multistep step; */ \
- /* struct in_multi *inm; */ \
-{ \
- (step).i_n = 0; \
- (step).i_inm = LIST_FIRST(&in_multihashtbl[0]); \
- IN_NEXT_MULTI((step), (inm)); \
-}
+bool in_multi_group(struct in_addr, struct ifnet *, int);
+struct in_multi *in_first_multi(struct in_multistep *);
+struct in_multi *in_next_multi(struct in_multistep *);
+struct in_multi *in_lookup_multi(struct in_addr, struct ifnet *);
+struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
+void in_delmulti(struct in_multi *);
+
+void in_multi_lock(int);
+void in_multi_unlock(void);
+int in_multi_lock_held(void);
struct ifaddr;
@@ -287,8 +240,6 @@ int in_ifinit(struct ifnet *,
void in_savemkludge(struct in_ifaddr *);
void in_restoremkludge(struct in_ifaddr *, struct ifnet *);
void in_purgemkludge(struct ifnet *);
-struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
-void in_delmulti(struct in_multi *);
void in_ifscrub(struct ifnet *, struct in_ifaddr *);
void in_setmaxmtu(void);
const char *in_fmtaddr(struct in_addr);
Index: src/sys/netinet/ip_carp.c
diff -u src/sys/netinet/ip_carp.c:1.55 src/sys/netinet/ip_carp.c:1.56
--- src/sys/netinet/ip_carp.c:1.55 Sat May 17 20:44:24 2014
+++ src/sys/netinet/ip_carp.c Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_carp.c,v 1.55 2014/05/17 20:44:24 rmind Exp $ */
+/* $NetBSD: ip_carp.c,v 1.56 2014/05/29 23:02:48 rmind Exp $ */
/* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
/*
@@ -31,7 +31,7 @@
#include "opt_mbuftrace.h"
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.55 2014/05/17 20:44:24 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.56 2014/05/29 23:02:48 rmind Exp $");
/*
* TODO:
@@ -1670,15 +1670,13 @@ carp_addr_updated(void *v)
/* Handle a callback after SIOCDIFADDR */
if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
struct in_addr mc_addr;
- struct in_multi *inm;
sc->sc_naddrs = new_naddrs;
sc->sc_naddrs6 = new_naddrs6;
/* Re-establish multicast membership removed by in_control */
mc_addr.s_addr = INADDR_CARP_GROUP;
- IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
- if (inm == NULL) {
+ if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
Index: src/sys/netinet/ip_input.c
diff -u src/sys/netinet/ip_input.c:1.315 src/sys/netinet/ip_input.c:1.316
--- src/sys/netinet/ip_input.c:1.315 Wed May 28 19:19:33 2014
+++ src/sys/netinet/ip_input.c Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_input.c,v 1.315 2014/05/28 19:19:33 christos Exp $ */
+/* $NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.315 2014/05/28 19:19:33 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $");
#include "opt_inet.h"
#include "opt_compat_netbsd.h"
@@ -632,7 +632,6 @@ ip_input(struct mbuf *m)
}
}
if (IN_MULTICAST(ip->ip_dst.s_addr)) {
- struct in_multi *inm;
#ifdef MROUTING
extern struct socket *ip_mrouter;
@@ -669,8 +668,7 @@ ip_input(struct mbuf *m)
* See if we belong to the destination multicast group on the
* arrival interface.
*/
- IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
- if (inm == NULL) {
+ if (!in_multi_group(ip->ip_dst, ifp, 0)) {
IP_STATINC(IP_STAT_CANTFORWARD);
m_freem(m);
return;
Index: src/sys/netinet/ip_output.c
diff -u src/sys/netinet/ip_output.c:1.227 src/sys/netinet/ip_output.c:1.228
--- src/sys/netinet/ip_output.c:1.227 Fri May 23 00:02:14 2014
+++ src/sys/netinet/ip_output.c Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_output.c,v 1.227 2014/05/23 00:02:14 rmind Exp $ */
+/* $NetBSD: ip_output.c,v 1.228 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.227 2014/05/23 00:02:14 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.228 2014/05/29 23:02:48 rmind Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -278,7 +278,7 @@ ip_output(struct mbuf *m0, ...)
if (IN_MULTICAST(ip->ip_dst.s_addr) ||
(ip->ip_dst.s_addr == INADDR_BROADCAST)) {
- struct in_multi *inm;
+ bool inmgroup;
m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
M_BCAST : M_MCAST;
@@ -331,9 +331,8 @@ ip_output(struct mbuf *m0, ...)
ip->ip_src = xia->ia_addr.sin_addr;
}
- IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
- if (inm != NULL &&
- (imo == NULL || imo->imo_multicast_loop)) {
+ inmgroup = in_multi_group(ip->ip_dst, ifp, flags);
+ if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) {
/*
* If we belong to the destination multicast group
* on the outgoing interface, and the caller did not
Index: src/sys/netinet/ip_var.h
diff -u src/sys/netinet/ip_var.h:1.103 src/sys/netinet/ip_var.h:1.104
--- src/sys/netinet/ip_var.h:1.103 Fri May 23 19:35:24 2014
+++ src/sys/netinet/ip_var.h Thu May 29 23:02:48 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_var.h,v 1.103 2014/05/23 19:35:24 rmind Exp $ */
+/* $NetBSD: ip_var.h,v 1.104 2014/05/29 23:02:48 rmind Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@@ -166,13 +166,20 @@ struct ip_moptions {
#include "opt_mbuftrace.h"
#endif
-/* flags passed to ip_output as last parameter */
-#define IP_FORWARDING 0x1 /* most of ip header exists */
-#define IP_RAWOUTPUT 0x2 /* raw ip header exists */
-#define IP_RETURNMTU 0x4 /* pass back mtu on EMSGSIZE */
-#define IP_NOIPNEWID 0x8 /* don't fill in ip_id */
+/*
+ * The following flags can be passed to ip_output() as last parameter
+ */
+#define IP_FORWARDING 0x0001 /* most of ip header exists */
+#define IP_RAWOUTPUT 0x0002 /* raw ip header exists */
+#define IP_RETURNMTU 0x0004 /* pass back mtu on EMSGSIZE */
+#define IP_NOIPNEWID 0x0008 /* don't fill in ip_id */
+
+CTASSERT(SO_DONTROUTE == 0x0010);
+CTASSERT(SO_BROADCAST == 0x0020);
#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */
#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
+
+#define IP_IGMP_MCAST 0x0040 /* IGMP for mcast join/leave */
#define IP_MTUDISC 0x0400 /* Path MTU Discovery; set DF */
extern struct domain inetdomain;