Author: bms
Date: Mon Mar  9 17:53:05 2009
New Revision: 189592
URL: http://svn.freebsd.org/changeset/base/189592

Log:
  Merge IGMPv3 and Source-Specific Multicast (SSM) to the FreeBSD
  IPv4 stack.
  
  Diffs are minimized against p4.
  PCS has been used for some protocol verification, more widespread
  testing of recorded sources in Group-and-Source queries is needed.
  sizeof(struct igmpstat) has changed.
  
  __FreeBSD_version is bumped to 800070.

Modified:
  head/UPDATING
  head/share/man/man4/Makefile
  head/share/man/man4/ip.4
  head/share/man/man4/multicast.4
  head/sys/netinet/if_ether.c
  head/sys/netinet/igmp.c
  head/sys/netinet/igmp_var.h
  head/sys/netinet/in.c
  head/sys/netinet/in.h
  head/sys/netinet/in_mcast.c
  head/sys/netinet/in_proto.c
  head/sys/netinet/in_var.h
  head/sys/netinet/ip_input.c
  head/sys/netinet/ip_var.h
  head/sys/netinet/raw_ip.c
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet/vinet.h
  head/sys/sys/param.h
  head/sys/sys/vimage.h
  head/usr.bin/netstat/inet.c
  head/usr.sbin/ifmcstat/Makefile
  head/usr.sbin/ifmcstat/ifmcstat.8
  head/usr.sbin/ifmcstat/ifmcstat.c

Modified: head/UPDATING
==============================================================================
--- head/UPDATING       Mon Mar  9 17:42:34 2009        (r189591)
+++ head/UPDATING       Mon Mar  9 17:53:05 2009        (r189592)
@@ -23,6 +23,45 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
        ln -s aj /etc/malloc.conf.)
 
 20090309:
+       IGMPv3 and Source-Specific Multicast (SSM) have been merged
+       to the IPv4 stack. VIMAGE hooks are in but not yet used.
+
+       For kernel developers, the most important changes are that the
+       ip_output() and ip_input() paths no longer take the IN_MULTI_LOCK(),
+       and this lock has been downgraded to a non-recursive mutex.
+
+       Transport protocols (UDP, Raw IP) are now responsible for filtering
+       inbound multicast traffic according to group membership and source
+       filters. The imo_multicast_filter() KPI exists for this purpose.
+       Transports which do not use multicast (SCTP, TCP) already reject
+       multicast by default. Forwarding and receive performance may improve
+       as a mutex acquisition is no longer needed in the ip_input()
+       low-level input path.  in_addmulti() and in_delmulti() are shimmed
+       to new KPIs which exist to support SSM in-kernel.
+
+       For application developers, it is recommended that loopback of
+       multicast datagrams be disabled for best performance, as this
+       will still cause the lock to be taken for each looped-back
+       datagram transmission. The net.inet.ip.mcast.loop sysctl may
+       be tuned to 0 to disable loopback by default; it defaults to 1
+       to preserve the existing behaviour.
+
+       For systems administrators, to obtain best performance with
+       multicast reception and multiple groups, it is always recommended
+       that a card with a suitably precise hash filter is used. Hash
+       collisions will still result in the lock being taken within the
+       transport protocol input path to check group membership.
+
+       If deploying FreeBSD in an environment with IGMP snooping switches,
+       it is recommended that the net.inet.igmp.sendlocal sysctl remain
+       enabled; this forces 224.0.0.0/24 group membership to be announced
+       via IGMP.
+
+       The size of 'struct igmpstat' has changed; netstat needs to be
+       recompiled to reflect this.
+       Bump __FreeBSD_version to 800070.
+
+20090309:
        libusb20.so.1 is now installed as libusb.so.1 and the ports system
        updated to use it. This requires a buildworld/installworld in order to
        update the library and dependencies (usbconfig, etc). Its advisable to

Modified: head/share/man/man4/Makefile
==============================================================================
--- head/share/man/man4/Makefile        Mon Mar  9 17:42:34 2009        
(r189591)
+++ head/share/man/man4/Makefile        Mon Mar  9 17:53:05 2009        
(r189592)
@@ -128,6 +128,7 @@ MAN=        aac.4 \
        if_bridge.4 \
        ifmib.4 \
        igb.4 \
+       igmp.4 \
        iic.4 \
        iicbb.4 \
        iicbus.4 \

Modified: head/share/man/man4/ip.4
==============================================================================
--- head/share/man/man4/ip.4    Mon Mar  9 17:42:34 2009        (r189591)
+++ head/share/man/man4/ip.4    Mon Mar  9 17:53:05 2009        (r189592)
@@ -32,7 +32,7 @@
 .\"     @(#)ip.4       8.2 (Berkeley) 11/30/93
 .\" $FreeBSD$
 .\"
-.Dd February 13, 2009
+.Dd March 9, 2009
 .Dt IP 4
 .Os
 .Sh NAME
@@ -466,13 +466,19 @@ setsockopt(s, IPPROTO_IP, IP_MULTICAST_L
 .Pp
 This option
 improves performance for applications that may have no more than one
-instance on a single host (such as a router daemon), by eliminating
+instance on a single host (such as a routing daemon), by eliminating
 the overhead of receiving their own transmissions.
 It should generally not
 be used by applications for which there may be more than one instance on a
 single host (such as a conferencing program) or for which the sender does
 not belong to the destination group (such as a time querying program).
 .Pp
+The sysctl setting
+.Va net.inet.ip.mcast.loop
+controls the default setting of the
+.Dv IP_MULTICAST_LOOP
+socket option for new sockets.
+.Pp
 A multicast datagram sent with an initial TTL greater than 1 may be delivered
 to the sending host on a different interface from that on which it was sent,
 if the host belongs to the destination group on that other interface.
@@ -650,6 +656,13 @@ documented in RFC 3678.
 For management of source filter lists using this API,
 please refer to
 .Xr sourcefilter 3 .
+.Pp
+The sysctl settings
+.Va net.inet.ip.mcast.maxsocksrc
+and
+.Va net.inet.ip.mcast.maxgrpsrc
+are used to specify an upper limit on the number of per-socket and per-group
+source filter entries which the kernel may allocate.
 .\"-----------------------
 .Ss "Raw IP Sockets"
 .Pp
@@ -795,6 +808,7 @@ field was not equal to the length of the
 .Xr send 2 ,
 .Xr byteorder 3 ,
 .Xr icmp 4 ,
+.Xr igmp 4 ,
 .Xr inet 4 ,
 .Xr intro 4 ,
 .Xr multicast 4 ,

Modified: head/share/man/man4/multicast.4
==============================================================================
--- head/share/man/man4/multicast.4     Mon Mar  9 17:42:34 2009        
(r189591)
+++ head/share/man/man4/multicast.4     Mon Mar  9 17:53:05 2009        
(r189592)
@@ -956,6 +956,7 @@ after the previous upcall.
 .Xr socket 2 ,
 .Xr sourcefilter 3 ,
 .Xr icmp6 4 ,
+.Xr igmp 4 ,
 .Xr inet 4 ,
 .Xr inet6 4 ,
 .Xr intro 4 ,

Modified: head/sys/netinet/if_ether.c
==============================================================================
--- head/sys/netinet/if_ether.c Mon Mar  9 17:42:34 2009        (r189591)
+++ head/sys/netinet/if_ether.c Mon Mar  9 17:53:05 2009        (r189592)
@@ -81,7 +81,8 @@ __FBSDID("$FreeBSD$");
 
 #define SIN(s) ((struct sockaddr_in *)s)
 #define SDL(s) ((struct sockaddr_dl *)s)
-#define LLTABLE(ifp)   ((struct lltable *)(ifp)->if_afdata[AF_INET])
+#define LLTABLE(ifp)   \
+       ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt
 
 SYSCTL_DECL(_net_link_ether);
 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");

Modified: head/sys/netinet/igmp.c
==============================================================================
--- head/sys/netinet/igmp.c     Mon Mar  9 17:42:34 2009        (r189591)
+++ head/sys/netinet/igmp.c     Mon Mar  9 17:53:05 2009        (r189592)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 1988 Stephen Deering.
  * Copyright (c) 1992, 1993
  *     The Regents of the University of California.  All rights reserved.
@@ -35,11 +36,13 @@
 
 /*
  * Internet Group Management Protocol (IGMP) routines.
+ * [RFC1112, RFC2236, RFC3376]
  *
  * Written by Steve Deering, Stanford, May 1988.
  * Modified by Rosen Sharma, Stanford, Aug 1994.
  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
+ * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
  *
  * MULTICAST Revision: 3.5.1.4
  */
@@ -52,6 +55,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -59,8 +63,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/vimage.h>
+#include <sys/ktr.h>
+#include <sys/condvar.h>
 
 #include <net/if.h>
+#include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -78,464 +85,3608 @@ __FBSDID("$FreeBSD$");
 
 #include <security/mac/mac_framework.h>
 
-static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+#ifndef KTR_IGMPV3
+#define KTR_IGMPV3 KTR_SUBSYS
+#endif
+
+static struct igmp_ifinfo *
+               igi_alloc_locked(struct ifnet *);
+static void    igi_delete_locked(const struct ifnet *);
+static void    igmp_dispatch_queue(struct ifqueue *, int, const int);
+static void    igmp_fasttimo_vnet(void);
+static void    igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
+static int     igmp_handle_state_change(struct in_multi *,
+                   struct igmp_ifinfo *);
+static int     igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
+static int     igmp_input_v1_query(struct ifnet *, const struct ip *);
+static int     igmp_input_v2_query(struct ifnet *, const struct ip *,
+                   const struct igmp *);
+static int     igmp_input_v3_query(struct ifnet *, const struct ip *,
+                   /*const*/ struct igmpv3 *);
+static int     igmp_input_v3_group_query(struct in_multi *,
+                   struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
+static int     igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
+                   /*const*/ struct igmp *);
+static int     igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
+                   /*const*/ struct igmp *);
+static void    igmp_intr(struct mbuf *);
+static int     igmp_isgroupreported(const struct in_addr);
+static struct mbuf *
+               igmp_ra_alloc(void);
+#ifdef KTR
+static char *  igmp_rec_type_to_str(const int);
+#endif
+static void    igmp_set_version(struct igmp_ifinfo *, const int);
+static void    igmp_slowtimo_vnet(void);
+static void    igmp_sysinit(void);
+static int     igmp_v1v2_queue_report(struct in_multi *, const int);
+static void    igmp_v1v2_process_group_timer(struct in_multi *, const int);
+static void    igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
+static void    igmp_v2_update_group(struct in_multi *, const int);
+static void    igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
+static void    igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
+static struct mbuf *
+               igmp_v3_encap_report(struct ifnet *, struct mbuf *);
+static int     igmp_v3_enqueue_group_record(struct ifqueue *,
+                   struct in_multi *, const int, const int, const int);
+static int     igmp_v3_enqueue_filter_change(struct ifqueue *,
+                   struct in_multi *);
+static void    igmp_v3_process_group_timers(struct igmp_ifinfo *,
+                   struct ifqueue *, struct ifqueue *, struct in_multi *,
+                   const int);
+static int     igmp_v3_merge_state_changes(struct in_multi *,
+                   struct ifqueue *);
+static void    igmp_v3_suppress_group_record(struct in_multi *);
+static int     sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
+static int     sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
+static int     sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
+
+#ifdef VIMAGE
+static vnet_attach_fn  vnet_igmp_iattach;
+static vnet_detach_fn  vnet_igmp_idetach;
+#else
+static int     vnet_igmp_iattach(const void *);
+static int     vnet_igmp_idetach(const void *);
+#endif /* VIMAGE */
+
+/*
+ * System-wide globals.
+ *
+ * Unlocked access to these is OK, except for the global IGMP output
+ * queue. The IGMP subsystem lock ends up being system-wide for the moment,
+ * because all VIMAGEs have to share a global output queue, as netisrs
+ * themselves are not virtualized.
+ *
+ * Locking:
+ *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ *    Any may be taken independently; if any are held at the same
+ *    time, the above lock order must be followed.
+ *  * All output is delegated to the netisr to handle IFF_NEEDSGIANT.
+ *    Most of the time, direct dispatch will be fine.
+ *  * IN_MULTI_LOCK covers in_multi.
+ *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
+ *    including the output queue.
+ *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
+ *    per-link state iterators.
+ *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
+ *    therefore it is not refcounted.
+ *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
+ *
+ * Reference counting
+ *  * IGMP acquires its own reference every time an in_multi is passed to
+ *    it and the group is being joined for the first time.
+ *  * IGMP releases its reference(s) on in_multi in a deferred way,
+ *    because the operations which process the release run as part of
+ *    a loop whose control variables are directly affected by the release
+ *    (that, and not recursing on the IF_ADDR_LOCK).
+ *
+ * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
+ * to a vnet in ifp->if_vnet.
+ *
+ */
+struct mtx              igmp_mtx;
+int                     mpsafe_igmp = 0;
+SYSCTL_INT(_debug, OID_AUTO, mpsafe_igmp, CTLFLAG_RDTUN, &mpsafe_igmp, 0,
+    "Enable SMP-safe IGMPv3");
+
+struct mbuf            *m_raopt;                /* Router Alert option */
+MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+
+/*
+ * Global netisr output queue.
+ * This is only used as a last resort if we cannot directly dispatch.
+ * As IN_MULTI_LOCK is no longer in the bottom half of IP, we can do
+ * this, providing mpsafe_igmp is set. If it is not, we take Giant,
+ * and queueing is forced.
+ */
+struct ifqueue          igmpoq;
+
+/*
+ * VIMAGE-wide globals.
+ *
+ * The IGMPv3 timers themselves need to run per-image, however,
+ * protosw timers run globally (see tcp).
+ * An ifnet can only be in one vimage at a time, and the loopback
+ * ifnet, loif, is itself virtualized.
+ * It would otherwise be possible to seriously hose IGMP state,
+ * and create inconsistencies in upstream multicast routing, if you have
+ * multiple VIMAGEs running on the same link joining different multicast
+ * groups, UNLESS the "primary IP address" is different. This is because
+ * IGMP for IPv4 does not force link-local addresses to be used for each
+ * node, unlike MLD for IPv6.
+ * Obviously the IGMPv3 per-interface state has per-vimage granularity
+ * also as a result.
+ *
+ * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
+ * policy to control the address used by IGMP on the link.
+ */
+#ifdef VIMAGE_GLOBALS
+int     interface_timers_running;       /* IGMPv3 general query response */
+int     state_change_timers_running;    /* IGMPv3 state-change retransmit */
+int     current_state_timers_running;   /* IGMPv1/v2 host report;
+                                         * IGMPv3 g/sg query response */
+
+LIST_HEAD(, igmp_ifinfo)        igi_head;
+struct igmpstat                         igmpstat;
+struct timeval                  igmp_gsrdelay;
+
+int     igmp_recvifkludge;
+int     igmp_sendra;
+int     igmp_sendlocal;
+int     igmp_v1enable;
+int     igmp_v2enable;
+int     igmp_legacysupp;
+int     igmp_default_version;
+#endif /* VIMAGE_GLOBALS */
+
+/*
+ * Virtualized sysctls.
+ */
+SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_igmp, IGMPCTL_STATS, stats,
+    CTLFLAG_RW, igmpstat, igmpstat, "");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, recvifkludge,
+    CTLFLAG_RW, igmp_recvifkludge, 0,
+    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendra,
+    CTLFLAG_RW, igmp_sendra, 0,
+    "Send IP Router Alert option in IGMPv2/v3 messages");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendlocal,
+    CTLFLAG_RW, igmp_sendlocal, 0,
+    "Send IGMP membership reports for 224.0.0.0/24 groups");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v1enable,
+    CTLFLAG_RW, igmp_v1enable, 0,
+    "Enable backwards compatibility with IGMPv1");
+SYSCTL_V_INT(V_NET, vnet_inet,  _net_inet_igmp, OID_AUTO, v2enable,
+    CTLFLAG_RW, igmp_v2enable, 0,
+    "Enable backwards compatibility with IGMPv2");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, legacysupp,
+    CTLFLAG_RW, igmp_legacysupp, 0,
+    "Allow v1/v2 reports to suppress v3 group responses");
+SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, default_version,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_default_version, 0,
+    sysctl_igmp_default_version, "I",
+    "Default version of IGMP to run on each interface");
+SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, gsrdelay,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_gsrdelay.tv_sec, 0,
+    sysctl_igmp_gsr, "I",
+    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
+
+/*
+ * Non-virtualized sysctls.
+ */
+SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
+    sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
+
+static __inline void
+igmp_save_context(struct mbuf *m, struct ifnet *ifp)
+{
+
+#ifdef VIMAGE
+       m->m_pkthdr.header = ifp->if_vnet;
+#endif /* VIMAGE */
+       m->m_pkthdr.flowid = ifp->if_index;
+}
+
+static __inline void
+igmp_scrub_context(struct mbuf *m)
+{
+
+       m->m_pkthdr.header = NULL;
+       m->m_pkthdr.flowid = 0;
+}
+
+#ifdef KTR
+static __inline char *
+inet_ntoa_haddr(in_addr_t haddr)
+{
+       struct in_addr ia;
+
+       ia.s_addr = htonl(haddr);
+       return (inet_ntoa(ia));
+}
+#endif
+
+/*
+ * Restore context from a queued IGMP output chain.
+ * Return saved ifindex.
+ *
+ * VIMAGE: The assertion is there to make sure that we
+ * actually called CURVNET_SET() with what's in the mbuf chain.
+ */
+static __inline uint32_t
+igmp_restore_context(struct mbuf *m)
+{
+
+#ifdef notyet
+#if defined(VIMAGE) && defined(INVARIANTS)
+       KASSERT(curvnet == (m->m_pkthdr.header),
+           ("%s: called when curvnet was not restored", __func__));
+#endif
+#endif
+       return (m->m_pkthdr.flowid);
+}
+
+/*
+ * Retrieve or set default IGMP version.
+ *
+ * VIMAGE: Assume curvnet set by caller.
+ * SMPng: NOTE: Serialized by IGMP lock.
+ */
+static int
+sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
+{
+       int      error;
+       int      new;
+
+       error = sysctl_wire_old_buffer(req, sizeof(int));
+       if (error)
+               return (error);
+
+       IGMP_LOCK();
+
+       new = V_igmp_default_version;
+
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if (error || !req->newptr)
+               goto out_locked;
+
+       if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
+               error = EINVAL;
+               goto out_locked;
+       }
+
+       CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
+            V_igmp_default_version, new);
+
+       V_igmp_default_version = new;
+
+out_locked:
+       IGMP_UNLOCK();
+       return (error);
+}
+
+/*
+ * Retrieve or set threshold between group-source queries in seconds.
+ *
+ * VIMAGE: Assume curvnet set by caller.
+ * SMPng: NOTE: Serialized by IGMP lock.
+ */
+static int
+sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       int i;
+
+       error = sysctl_wire_old_buffer(req, sizeof(int));
+       if (error)
+               return (error);
+
+       IGMP_LOCK();
+
+       i = V_igmp_gsrdelay.tv_sec;
+
+       error = sysctl_handle_int(oidp, &i, 0, req);
+       if (error || !req->newptr)
+               goto out_locked;
+
+       if (i < -1 || i >= 60) {
+               error = EINVAL;
+               goto out_locked;
+       }
+
+       CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
+            V_igmp_gsrdelay.tv_sec, i);
+       V_igmp_gsrdelay.tv_sec = i;
+
+out_locked:
+       IGMP_UNLOCK();
+       return (error);
+}
+
+/*
+ * Expose struct igmp_ifinfo to userland, keyed by ifindex.
+ * For use by ifmcstat(8).
+ *
+ * SMPng: NOTE: Does an unlocked ifindex space read.
+ * VIMAGE: Assume curvnet set by caller. The node handler itself
+ * is not directly virtualized.
+ */
+static int
+sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
+{
+       INIT_VNET_NET(curvnet);
+       int                     *name;
+       int                      error;
+       u_int                    namelen;
+       struct ifnet            *ifp;
+       struct igmp_ifinfo      *igi;
+
+       name = (int *)arg1;
+       namelen = arg2;
+
+       if (req->newptr != NULL)
+               return (EPERM);
+
+       if (namelen != 1)
+               return (EINVAL);
+
+       error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
+       if (error)
+               return (error);
+
+       IN_MULTI_LOCK();
+       IGMP_LOCK();
+
+       if (name[0] <= 0 || name[0] > V_if_index) {
+               error = ENOENT;
+               goto out_locked;
+       }
+
+       error = ENOENT;
+
+       ifp = ifnet_byindex(name[0]);
+       if (ifp == NULL)
+               goto out_locked;
+
+       LIST_FOREACH(igi, &V_igi_head, igi_link) {
+               if (ifp == igi->igi_ifp) {
+                       error = SYSCTL_OUT(req, igi,
+                           sizeof(struct igmp_ifinfo));
+                       break;
+               }
+       }
+
+out_locked:
+       IGMP_UNLOCK();
+       IN_MULTI_UNLOCK();
+       return (error);
+}
+
+/*
+ * Dispatch an entire queue of pending packet chains
+ * using the netisr.
+ * VIMAGE: Assumes the vnet pointer has been set.
+ */
+static void
+igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
+{
+       struct mbuf *m;
+
+       for (;;) {
+               _IF_DEQUEUE(ifq, m);
+               if (m == NULL)
+                       break;
+               CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
+               if (loop)
+                       m->m_flags |= M_IGMP_LOOP;
+               netisr_dispatch(NETISR_IGMP, m);
+               if (--limit == 0)
+                       break;
+       }
+}
+
+/*
+ * Filter outgoing IGMP report state by group.
+ *
+ * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
+ * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
+ * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
+ * this may break certain IGMP snooping switches which rely on the old
+ * report behaviour.
+ *
+ * Return zero if the given group is one for which IGMP reports
+ * should be suppressed, or non-zero if reports should be issued.
+ */
+static __inline int
+igmp_isgroupreported(const struct in_addr addr)
+{
+
+       if (in_allhosts(addr) ||
+           ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
+               return (0);
+
+       return (1);
+}
+
+/*
+ * Construct a Router Alert option to use in outgoing packets.
+ */
+static struct mbuf *
+igmp_ra_alloc(void)
+{
+       struct mbuf     *m;
+       struct ipoption *p;
+
+       MGET(m, M_DONTWAIT, MT_DATA);
+       p = mtod(m, struct ipoption *);
+       p->ipopt_dst.s_addr = INADDR_ANY;
+       p->ipopt_list[0] = IPOPT_RA;    /* Router Alert Option */
+       p->ipopt_list[1] = 0x04;        /* 4 bytes long */
+       p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
+       p->ipopt_list[3] = 0x00;        /* pad byte */
+       m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
+
+       return (m);
+}
+
+/*
+ * Attach IGMP when PF_INET is attached to an interface.
+ *
+ * VIMAGE: Currently we set the vnet pointer, although it is
+ * likely that it was already set by our caller.
+ */
+struct igmp_ifinfo *
+igmp_domifattach(struct ifnet *ifp)
+{
+       struct igmp_ifinfo *igi;
+
+       CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
+           __func__, ifp, ifp->if_xname);
+
+       CURVNET_SET(ifp->if_vnet);
+       IGMP_LOCK();
+
+       igi = igi_alloc_locked(ifp);
+       if (!(ifp->if_flags & IFF_MULTICAST))
+               igi->igi_flags |= IGIF_SILENT;
+
+       IGMP_UNLOCK();
+       CURVNET_RESTORE();
+
+       return (igi);
+}
+
+/*
+ * VIMAGE: assume curvnet set by caller.
+ */
+static struct igmp_ifinfo *
+igi_alloc_locked(/*const*/ struct ifnet *ifp)
+{
+       struct igmp_ifinfo *igi;
+
+       IGMP_LOCK_ASSERT();
+
+       igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
+       if (igi == NULL)
+               goto out;
+
+       igi->igi_ifp = ifp;
+       igi->igi_version = V_igmp_default_version;
+       igi->igi_flags = 0;
+       igi->igi_rv = IGMP_RV_INIT;
+       igi->igi_qi = IGMP_QI_INIT;
+       igi->igi_qri = IGMP_QRI_INIT;
+       igi->igi_uri = IGMP_URI_INIT;
+
+       SLIST_INIT(&igi->igi_relinmhead);
+
+       /*
+        * Responses to general queries are subject to bounds.
+        */
+       IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
+
+       LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
+
+       CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
+            ifp, ifp->if_xname);
+
+out:
+       return (igi);
+}
+
+/*
+ * Hook for ifdetach.
+ *
+ * NOTE: Some finalization tasks need to run before the protocol domain
+ * is detached, but also before the link layer does its cleanup.
+ *
+ * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
+ *
+ * VIMAGE: curvnet should have been set by caller, but let's not assume
+ * that for now.
+ */
+void
+igmp_ifdetach(struct ifnet *ifp)
+{
+       struct igmp_ifinfo      *igi;
+       struct ifmultiaddr      *ifma;
+       struct in_multi         *inm, *tinm;
+
+       CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
+           ifp->if_xname);
+
+       CURVNET_SET(ifp->if_vnet);
+
+       IGMP_LOCK();
+
+       igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+       if (igi->igi_version == IGMP_VERSION_3) {
+               IF_ADDR_LOCK(ifp);
+               TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+                       if (ifma->ifma_addr->sa_family != AF_INET)
+                               continue;
+                       inm = (struct in_multi *)ifma->ifma_protospec;
+                       if (inm->inm_state == IGMP_LEAVING_MEMBER) {
+                               SLIST_INSERT_HEAD(&igi->igi_relinmhead,
+                                   inm, inm_nrele);
+                       }
+                       inm_clear_recorded(inm);
+               }
+               IF_ADDR_UNLOCK(ifp);
+               /*
+                * Free the in_multi reference(s) for this IGMP lifecycle.
+                */
+               SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
+                   tinm) {
+                       SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
+                       inm_release_locked(inm);
+               }
+       }
+
+       IGMP_UNLOCK();
+
+#ifdef VIMAGE
+       /*
+        * Plug the potential race which may occur when a VIMAGE
+        * is detached and we are forced to queue pending IGMP output for
+        * output netisr processing due to !mpsafe_igmp. In this case it
+        * is possible that igmp_intr() is about to see mbuf chains with
+        * invalid cached curvnet pointers.
+        * This is a rare condition, so just blow them all away.
+        * FUTURE: This may in fact not be needed, because IFF_NEEDSGIANT
+        * is being removed in 8.x and the netisr may then be eliminated;
+        * it is needed only if VIMAGE and IFF_NEEDSGIANT need to co-exist
+        */
+       if (!mpsafe_igmp) {
+               int drops;
+
+               IF_LOCK(&igmpoq);
+               drops = igmpoq.ifq_len;
+               _IF_DRAIN(&igmpoq);
+               IF_UNLOCK(&igmpoq);
+               if (bootverbose && drops) {
+                       printf("%s: dropped %d pending IGMP output packets\n",
+                           __func__, drops);
+               }
+       }
+#endif /* VIMAGE */
+
+       CURVNET_RESTORE();
+}
+
+/*
+ * Hook for domifdetach.
+ *
+ * VIMAGE: curvnet should have been set by caller, but let's not assume
+ * that for now.
+ */
+void
+igmp_domifdetach(struct ifnet *ifp)
+{
+       struct igmp_ifinfo *igi;
+
+       CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
+           __func__, ifp, ifp->if_xname);
+
+       CURVNET_SET(ifp->if_vnet);
+       IGMP_LOCK();
+
+       igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+       igi_delete_locked(ifp);
+
+       IGMP_UNLOCK();
+       CURVNET_RESTORE();
+}
+
+static void
+igi_delete_locked(const struct ifnet *ifp)
+{
+       struct igmp_ifinfo *igi, *tigi;
+
+       CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
+           __func__, ifp, ifp->if_xname);
+
+       IGMP_LOCK_ASSERT();
+
+       LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
+               if (igi->igi_ifp == ifp) {
+                       /*
+                        * Free deferred General Query responses.
+                        */
+                       _IF_DRAIN(&igi->igi_gq);
+
+                       LIST_REMOVE(igi, igi_link);
+
+                       KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
+                           ("%s: there are dangling in_multi references",
+                           __func__));
+
+                       free(igi, M_IGMP);
+                       return;
+               }
+       }
+
+#ifdef INVARIANTS
+       panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
+#endif
+}
+
+/*
+ * Process a received IGMPv1 query.
+ * Return non-zero if the message should be dropped.
+ *
+ * VIMAGE: The curvnet pointer is derived from the input ifp.
+ */
+static int
+igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip)
+{
+       INIT_VNET_INET(ifp->if_vnet);
+       struct ifmultiaddr      *ifma;
+       struct igmp_ifinfo      *igi;
+       struct in_multi         *inm;
+
+       /*
+        * IGMPv1 General Queries SHOULD always addressed to 224.0.0.1.
+        * igmp_group is always ignored. Do not drop it as a userland
+        * daemon may wish to see it.
+        */
+       if (!in_allhosts(ip->ip_dst)) {
+               ++V_igmpstat.igps_rcv_badqueries;
+               return (0);
+       }
+
+       ++V_igmpstat.igps_rcv_gen_queries;
+
+       /*
+        * Switch to IGMPv1 host compatibility mode.
+        */
+       IN_MULTI_LOCK();
+       IGMP_LOCK();
+
+       igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+       KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+       if (igi->igi_flags & IGIF_LOOPBACK) {
+               CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
+                   ifp, ifp->if_xname);
+               goto out_locked;
+       }
+
+       igmp_set_version(igi, IGMP_VERSION_1);
+
+       CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
+
+       /*
+        * Start the timers in all of our group records
+        * for the interface on which the query arrived,
+        * except those which are already running.
+        */
+       IF_ADDR_LOCK(ifp);
+       TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+               if (ifma->ifma_addr->sa_family != AF_INET)
+                       continue;
+               inm = (struct in_multi *)ifma->ifma_protospec;
+               if (inm->inm_timer != 0)
+                       continue;
+               switch (inm->inm_state) {
+               case IGMP_NOT_MEMBER:
+               case IGMP_SILENT_MEMBER:
+                       break;
+               case IGMP_G_QUERY_PENDING_MEMBER:
+               case IGMP_SG_QUERY_PENDING_MEMBER:
+               case IGMP_REPORTING_MEMBER:
+               case IGMP_IDLE_MEMBER:
+               case IGMP_LAZY_MEMBER:
+               case IGMP_SLEEPING_MEMBER:
+               case IGMP_AWAKENING_MEMBER:
+                       inm->inm_state = IGMP_REPORTING_MEMBER;
+                       inm->inm_timer = IGMP_RANDOM_DELAY(
+                           IGMP_V1V2_MAX_RI * PR_FASTHZ);
+                       V_current_state_timers_running = 1;
+                       break;
+               case IGMP_LEAVING_MEMBER:
+                       break;
+               }
+       }
+       IF_ADDR_UNLOCK(ifp);
+
+out_locked:
+       IGMP_UNLOCK();
+       IN_MULTI_UNLOCK();
+
+       return (0);
+}
+
+/*
+ * Process a received IGMPv2 general or group-specific query.
+ */
+static int
+igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
+    const struct igmp *igmp)
+{
+       struct ifmultiaddr      *ifma;
+       struct igmp_ifinfo      *igi;
+       struct in_multi         *inm;
+       uint16_t                 timer;
+
+       /*
+        * Perform lazy allocation of IGMP link info if required,
+        * and switch to IGMPv2 host compatibility mode.
+        */
+       IN_MULTI_LOCK();
+       IGMP_LOCK();
+
+       igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+       KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+       if (igi->igi_flags & IGIF_LOOPBACK) {
+               CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
+                   ifp, ifp->if_xname);
+               goto out_locked;
+       }
+
+       igmp_set_version(igi, IGMP_VERSION_2);
+
+       timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
+       if (timer == 0)
+               timer = 1;
+
+       if (!in_nullhost(igmp->igmp_group)) {
+               /*
+                * IGMPv2 Group-Specific Query.
+                * If this is a group-specific IGMPv2 query, we need only
+                * look up the single group to process it.
+                */
+               inm = inm_lookup(ifp, igmp->igmp_group);
+               if (inm != NULL) {
+                       CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
+                           inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+                       igmp_v2_update_group(inm, timer);
+               }
+               ++V_igmpstat.igps_rcv_group_queries;
+       } else {
+               /*
+                * IGMPv2 General Query.
+                * If this was not sent to the all-hosts group, ignore it.
+                */
+               if (in_allhosts(ip->ip_dst)) {
+                       /*
+                        * For each reporting group joined on this
+                        * interface, kick the report timer.
+                        */
+                       CTR2(KTR_IGMPV3,
+                           "process v2 general query on ifp %p(%s)",
+                           ifp, ifp->if_xname);
+
+                       IF_ADDR_LOCK(ifp);
+                       TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+                               if (ifma->ifma_addr->sa_family != AF_INET)
+                                       continue;
+                               inm = (struct in_multi *)ifma->ifma_protospec;
+                               igmp_v2_update_group(inm, timer);
+                       }
+                       IF_ADDR_UNLOCK(ifp);
+               }
+               ++V_igmpstat.igps_rcv_gen_queries;
+       }
+
+out_locked:
+       IGMP_UNLOCK();
+       IN_MULTI_UNLOCK();
+
+       return (0);
+}
+
+/*
+ * Update the report timer on a group in response to an IGMPv2 query.
+ *
+ * If we are becoming the reporting member for this group, start the timer.
+ * If we already are the reporting member for this group, and timer is
+ * below the threshold, reset it.
+ *

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to