Author: ae
Date: Tue Jun  5 21:24:59 2018
New Revision: 334673
URL: https://svnweb.freebsd.org/changeset/base/334673

Log:
  Rework if_gif(4) to use new encap_lookup_t method to speedup lookup
  of needed interface when many gif interfaces are present.
  
  Remove rmlock from gif_softc, use epoch(9) and CK_LIST instead.
  Move more AF-related code into AF-related locations.
  Use hash table to speedup lookup of needed softc. Interfaces
  with GIF_IGNORE_SOURCE flag are stored in plain CK_LIST.
  Sysctl net.link.gif.parallel_tunnels is removed. The removal was planed
  16 years ago, and actually it could work only for outbound direction.
  Each protocol, that can be handled by if_gif(4) interface is registered
  by separate encap handler, this helps avoid invoking the handler
  for unrelated protocols (GRE, PIM, etc.).
  
  This change allows dramatically improve performance when many gif(4)
  interfaces are used.
  
  Sponsored by: Yandex LLC

Modified:
  head/share/man/man4/gif.4
  head/sys/net/if_gif.c
  head/sys/net/if_gif.h
  head/sys/netinet/in_gif.c
  head/sys/netinet6/in6_gif.c

Modified: head/share/man/man4/gif.4
==============================================================================
--- head/share/man/man4/gif.4   Tue Jun  5 20:54:29 2018        (r334672)
+++ head/share/man/man4/gif.4   Tue Jun  5 21:24:59 2018        (r334673)
@@ -29,7 +29,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 10, 2015
+.Dd June 5, 2018
 .Dt GIF 4
 .Os
 .Sh NAME
@@ -169,14 +169,6 @@ This behavior may be modified at runtime by setting th
 variable
 .Va net.link.gif.max_nesting
 to the desired level of nesting.
-Additionally,
-.Nm
-tunnels are restricted to one per pair of end points.
-Parallel tunnels may be enabled by setting the
-.Xr sysctl 8
-variable
-.Va net.link.gif.parallel_tunnels
-to 1.
 .Sh SEE ALSO
 .Xr gre 4 ,
 .Xr inet 4 ,

Modified: head/sys/net/if_gif.c
==============================================================================
--- head/sys/net/if_gif.c       Tue Jun  5 20:54:29 2018        (r334672)
+++ head/sys/net/if_gif.c       Tue Jun  5 21:24:59 2018        (r334673)
@@ -2,6 +2,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <a...@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -39,7 +40,6 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/syslog.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/protosw.h>
 #include <sys/conf.h>
 #include <machine/cpu.h>
 
@@ -85,8 +84,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip6.h>
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
-#include <netinet6/scope6_var.h>
-#include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 
 #include <netinet/ip_encap.h>
@@ -98,32 +95,17 @@ __FBSDID("$FreeBSD$");
 
 static const char gifname[] = "gif";
 
-/*
- * gif_mtx protects a per-vnet gif_softc_list.
- */
-static VNET_DEFINE(struct mtx, gif_mtx);
-#define        V_gif_mtx               VNET(gif_mtx)
-static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
-static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
-#define        V_gif_softc_list        VNET(gif_softc_list)
+MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static struct sx gif_ioctl_sx;
 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
 
-#define        GIF_LIST_LOCK_INIT(x)           mtx_init(&V_gif_mtx, "gif_mtx", 
\
-                                           NULL, MTX_DEF)
-#define        GIF_LIST_LOCK_DESTROY(x)        mtx_destroy(&V_gif_mtx)
-#define        GIF_LIST_LOCK(x)                mtx_lock(&V_gif_mtx)
-#define        GIF_LIST_UNLOCK(x)              mtx_unlock(&V_gif_mtx)
-
 void   (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void   (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void   (*ng_gif_attach_p)(struct ifnet *ifp);
 void   (*ng_gif_detach_p)(struct ifnet *ifp);
 
 static int     gif_check_nesting(struct ifnet *, struct mbuf *);
-static int     gif_set_tunnel(struct ifnet *, struct sockaddr *,
-    struct sockaddr *);
-static void    gif_delete_tunnel(struct ifnet *);
+static void    gif_delete_tunnel(struct gif_softc *);
 static int     gif_ioctl(struct ifnet *, u_long, caddr_t);
 static int     gif_transmit(struct ifnet *, struct mbuf *);
 static void    gif_qflush(struct ifnet *);
@@ -132,8 +114,6 @@ static void gif_clone_destroy(struct ifnet *);
 static VNET_DEFINE(struct if_clone *, gif_cloner);
 #define        V_gif_cloner    VNET(gif_cloner)
 
-static int gifmodevent(module_t, int, void *);
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
     "Generic Tunnel Interface");
@@ -153,21 +133,6 @@ static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NES
 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
 
-/*
- * By default, we disallow creation of multiple tunnels between the same
- * pair of addresses.  Some applications require this functionality so
- * we allow control over this check here.
- */
-#ifdef XBONEHACK
-static VNET_DEFINE(int, parallel_tunnels) = 1;
-#else
-static VNET_DEFINE(int, parallel_tunnels) = 0;
-#endif
-#define        V_parallel_tunnels      VNET(parallel_tunnels)
-SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
-    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
-    "Allow parallel tunnels?");
-
 static int
 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
@@ -176,20 +141,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr
        sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
        sc->gif_fibnum = curthread->td_proc->p_fibnum;
        GIF2IFP(sc) = if_alloc(IFT_GIF);
-       GIF_LOCK_INIT(sc);
        GIF2IFP(sc)->if_softc = sc;
        if_initname(GIF2IFP(sc), gifname, unit);
 
        GIF2IFP(sc)->if_addrlen = 0;
        GIF2IFP(sc)->if_mtu    = GIF_MTU;
        GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
-#if 0
-       /* turn off ingress filter */
-       GIF2IFP(sc)->if_flags  |= IFF_LINK2;
-#endif
        GIF2IFP(sc)->if_ioctl  = gif_ioctl;
-       GIF2IFP(sc)->if_transmit  = gif_transmit;
-       GIF2IFP(sc)->if_qflush  = gif_qflush;
+       GIF2IFP(sc)->if_transmit = gif_transmit;
+       GIF2IFP(sc)->if_qflush = gif_qflush;
        GIF2IFP(sc)->if_output = gif_output;
        GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
        GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
@@ -198,9 +158,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr
        if (ng_gif_attach_p != NULL)
                (*ng_gif_attach_p)(GIF2IFP(sc));
 
-       GIF_LIST_LOCK();
-       LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
-       GIF_LIST_UNLOCK();
        return (0);
 }
 
@@ -211,10 +168,7 @@ gif_clone_destroy(struct ifnet *ifp)
 
        sx_xlock(&gif_ioctl_sx);
        sc = ifp->if_softc;
-       gif_delete_tunnel(ifp);
-       GIF_LIST_LOCK();
-       LIST_REMOVE(sc, gif_list);
-       GIF_LIST_UNLOCK();
+       gif_delete_tunnel(sc);
        if (ng_gif_detach_p != NULL)
                (*ng_gif_detach_p)(ifp);
        bpfdetach(ifp);
@@ -222,8 +176,8 @@ gif_clone_destroy(struct ifnet *ifp)
        ifp->if_softc = NULL;
        sx_xunlock(&gif_ioctl_sx);
 
+       GIF_WAIT();
        if_free(ifp);
-       GIF_LOCK_DESTROY(sc);
        free(sc, M_GIF);
 }
 
@@ -231,10 +185,14 @@ static void
 vnet_gif_init(const void *unused __unused)
 {
 
-       LIST_INIT(&V_gif_softc_list);
-       GIF_LIST_LOCK_INIT();
        V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
            gif_clone_destroy, 0);
+#ifdef INET
+       in_gif_init();
+#endif
+#ifdef INET6
+       in6_gif_init();
+#endif
 }
 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_init, NULL);
@@ -244,7 +202,12 @@ vnet_gif_uninit(const void *unused __unused)
 {
 
        if_clone_detach(V_gif_cloner);
-       GIF_LIST_LOCK_DESTROY();
+#ifdef INET
+       in_gif_uninit();
+#endif
+#ifdef INET6
+       in6_gif_uninit();
+#endif
 }
 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_uninit, NULL);
@@ -272,65 +235,25 @@ static moduledata_t gif_mod = {
 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
-int
-gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+struct gif_list *
+gif_hashinit(void)
 {
-       GIF_RLOCK_TRACKER;
-       const struct ip *ip;
-       struct gif_softc *sc;
-       int ret;
+       struct gif_list *hash;
+       int i;
 
-       sc = (struct gif_softc *)arg;
-       if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
-               return (0);
+       hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
+           M_GIF, M_WAITOK);
+       for (i = 0; i < GIF_HASH_SIZE; i++)
+               CK_LIST_INIT(&hash[i]);
 
-       ret = 0;
-       GIF_RLOCK(sc);
+       return (hash);
+}
 
-       /* no physical address */
-       if (sc->gif_family == 0)
-               goto done;
+void
+gif_hashdestroy(struct gif_list *hash)
+{
 
-       switch (proto) {
-#ifdef INET
-       case IPPROTO_IPV4:
-#endif
-#ifdef INET6
-       case IPPROTO_IPV6:
-#endif
-       case IPPROTO_ETHERIP:
-               break;
-       default:
-               goto done;
-       }
-
-       /* Bail on short packets */
-       M_ASSERTPKTHDR(m);
-       if (m->m_pkthdr.len < sizeof(struct ip))
-               goto done;
-
-       ip = mtod(m, const struct ip *);
-       switch (ip->ip_v) {
-#ifdef INET
-       case 4:
-               if (sc->gif_family != AF_INET)
-                       goto done;
-               ret = in_gif_encapcheck(m, off, proto, arg);
-               break;
-#endif
-#ifdef INET6
-       case 6:
-               if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
-                       goto done;
-               if (sc->gif_family != AF_INET6)
-                       goto done;
-               ret = in6_gif_encapcheck(m, off, proto, arg);
-               break;
-#endif
-       }
-done:
-       GIF_RUNLOCK(sc);
-       return (ret);
+       free(hash, M_GIF);
 }
 
 static int
@@ -357,6 +280,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
        }
 #endif
        error = ENETDOWN;
+       GIF_RLOCK();
        sc = ifp->if_softc;
        if ((ifp->if_flags & IFF_MONITOR) != 0 ||
            (ifp->if_flags & IFF_UP) == 0 ||
@@ -444,6 +368,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
 err:
        if (error)
                if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+       GIF_RUNLOCK();
        return (error);
 }
 
@@ -616,7 +541,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto
                break;
 #endif
        case AF_LINK:
-               n = sizeof(struct etherip_header) + sizeof(struct ether_header);
+               n = sizeof(struct etherip_header) +
+                   sizeof(struct ether_header);
                if (n > m->m_len)
                        m = m_pullup(m, n);
                if (m == NULL)
@@ -674,20 +600,11 @@ drop:
        if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
-/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
-int
+static int
 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-       GIF_RLOCK_TRACKER;
        struct ifreq *ifr = (struct ifreq*)data;
-       struct sockaddr *dst, *src;
        struct gif_softc *sc;
-#ifdef INET
-       struct sockaddr_in *sin = NULL;
-#endif
-#ifdef INET6
-       struct sockaddr_in6 *sin6 = NULL;
-#endif
        u_int options;
        int error;
 
@@ -715,176 +632,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
        }
        error = 0;
        switch (cmd) {
-       case SIOCSIFPHYADDR:
-#ifdef INET6
-       case SIOCSIFPHYADDR_IN6:
-#endif
-               error = EINVAL;
-               switch (cmd) {
-#ifdef INET
-               case SIOCSIFPHYADDR:
-                       src = (struct sockaddr *)
-                               &(((struct in_aliasreq *)data)->ifra_addr);
-                       dst = (struct sockaddr *)
-                               &(((struct in_aliasreq *)data)->ifra_dstaddr);
+       case SIOCDIFPHYADDR:
+               if (sc->gif_family == 0)
                        break;
-#endif
-#ifdef INET6
-               case SIOCSIFPHYADDR_IN6:
-                       src = (struct sockaddr *)
-                               &(((struct in6_aliasreq *)data)->ifra_addr);
-                       dst = (struct sockaddr *)
-                               &(((struct in6_aliasreq *)data)->ifra_dstaddr);
-                       break;
-#endif
-               default:
-                       goto bad;
-               }
-               /* sa_family must be equal */
-               if (src->sa_family != dst->sa_family ||
-                   src->sa_len != dst->sa_len)
-                       goto bad;
-
-               /* validate sa_len */
-               /* check sa_family looks sane for the cmd */
-               switch (src->sa_family) {
-#ifdef INET
-               case AF_INET:
-                       if (src->sa_len != sizeof(struct sockaddr_in))
-                               goto bad;
-                       if (cmd != SIOCSIFPHYADDR) {
-                               error = EAFNOSUPPORT;
-                               goto bad;
-                       }
-                       if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
-                           satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
-                               error = EADDRNOTAVAIL;
-                               goto bad;
-                       }
-                       break;
-#endif
-#ifdef INET6
-               case AF_INET6:
-                       if (src->sa_len != sizeof(struct sockaddr_in6))
-                               goto bad;
-                       if (cmd != SIOCSIFPHYADDR_IN6) {
-                               error = EAFNOSUPPORT;
-                               goto bad;
-                       }
-                       error = EADDRNOTAVAIL;
-                       if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
-                           ||
-                           IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
-                               goto bad;
-                       /*
-                        * Check validity of the scope zone ID of the
-                        * addresses, and convert it into the kernel
-                        * internal form if necessary.
-                        */
-                       error = sa6_embedscope(satosin6(src), 0);
-                       if (error != 0)
-                               goto bad;
-                       error = sa6_embedscope(satosin6(dst), 0);
-                       if (error != 0)
-                               goto bad;
-                       break;
-#endif
-               default:
-                       error = EAFNOSUPPORT;
-                       goto bad;
-               }
-               error = gif_set_tunnel(ifp, src, dst);
+               gif_delete_tunnel(sc);
                break;
-       case SIOCDIFPHYADDR:
-               gif_delete_tunnel(ifp);
-               break;
+#ifdef INET
+       case SIOCSIFPHYADDR:
        case SIOCGIFPSRCADDR:
        case SIOCGIFPDSTADDR:
+               error = in_gif_ioctl(sc, cmd, data);
+               break;
+#endif
 #ifdef INET6
+       case SIOCSIFPHYADDR_IN6:
        case SIOCGIFPSRCADDR_IN6:
        case SIOCGIFPDSTADDR_IN6:
-#endif
-               if (sc->gif_family == 0) {
-                       error = EADDRNOTAVAIL;
-                       break;
-               }
-               GIF_RLOCK(sc);
-               switch (cmd) {
-#ifdef INET
-               case SIOCGIFPSRCADDR:
-               case SIOCGIFPDSTADDR:
-                       if (sc->gif_family != AF_INET) {
-                               error = EADDRNOTAVAIL;
-                               break;
-                       }
-                       sin = (struct sockaddr_in *)&ifr->ifr_addr;
-                       memset(sin, 0, sizeof(*sin));
-                       sin->sin_family = AF_INET;
-                       sin->sin_len = sizeof(*sin);
-                       break;
-#endif
-#ifdef INET6
-               case SIOCGIFPSRCADDR_IN6:
-               case SIOCGIFPDSTADDR_IN6:
-                       if (sc->gif_family != AF_INET6) {
-                               error = EADDRNOTAVAIL;
-                               break;
-                       }
-                       sin6 = (struct sockaddr_in6 *)
-                               &(((struct in6_ifreq *)data)->ifr_addr);
-                       memset(sin6, 0, sizeof(*sin6));
-                       sin6->sin6_family = AF_INET6;
-                       sin6->sin6_len = sizeof(*sin6);
-                       break;
-#endif
-               default:
-                       error = EAFNOSUPPORT;
-               }
-               if (error == 0) {
-                       switch (cmd) {
-#ifdef INET
-                       case SIOCGIFPSRCADDR:
-                               sin->sin_addr = sc->gif_iphdr->ip_src;
-                               break;
-                       case SIOCGIFPDSTADDR:
-                               sin->sin_addr = sc->gif_iphdr->ip_dst;
-                               break;
-#endif
-#ifdef INET6
-                       case SIOCGIFPSRCADDR_IN6:
-                               sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
-                               break;
-                       case SIOCGIFPDSTADDR_IN6:
-                               sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
-                               break;
-#endif
-                       }
-               }
-               GIF_RUNLOCK(sc);
-               if (error != 0)
-                       break;
-               switch (cmd) {
-#ifdef INET
-               case SIOCGIFPSRCADDR:
-               case SIOCGIFPDSTADDR:
-                       error = prison_if(curthread->td_ucred,
-                           (struct sockaddr *)sin);
-                       if (error != 0)
-                               memset(sin, 0, sizeof(*sin));
-                       break;
-#endif
-#ifdef INET6
-               case SIOCGIFPSRCADDR_IN6:
-               case SIOCGIFPDSTADDR_IN6:
-                       error = prison_if(curthread->td_ucred,
-                           (struct sockaddr *)sin6);
-                       if (error == 0)
-                               error = sa6_recoverscope(sin6);
-                       if (error != 0)
-                               memset(sin6, 0, sizeof(*sin6));
-#endif
-               }
+               error = in6_gif_ioctl(sc, cmd, data);
                break;
+#endif
        case SIOCGTUNFIB:
                ifr->ifr_fib = sc->gif_fibnum;
                break;
@@ -908,171 +674,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                    sizeof(options));
                if (error)
                        break;
-               if (options & ~GIF_OPTMASK)
+               if (options & ~GIF_OPTMASK) {
                        error = EINVAL;
-               else
-                       sc->gif_options = options;
-               break;
-       default:
-               error = EINVAL;
-               break;
-       }
-bad:
-       sx_xunlock(&gif_ioctl_sx);
-       return (error);
-}
-
-static void
-gif_detach(struct gif_softc *sc, int family)
-{
-
-       sx_assert(&gif_ioctl_sx, SA_XLOCKED);
-       if (sc->gif_ecookie != NULL) {
-               switch (family) {
-#ifdef INET
-               case AF_INET:
-                       ip_encap_detach(sc->gif_ecookie);
                        break;
-#endif
-#ifdef INET6
-               case AF_INET6:
-                       ip6_encap_detach(sc->gif_ecookie);
-                       break;
-#endif
                }
-       }
-       sc->gif_ecookie = NULL;
-}
-
-static int
-gif_attach(struct gif_softc *sc, int af)
-{
-
-       sx_assert(&gif_ioctl_sx, SA_XLOCKED);
-       switch (af) {
+               if (sc->gif_options != options) {
+                       switch (sc->gif_family) {
 #ifdef INET
-       case AF_INET:
-               return (in_gif_attach(sc));
+                       case AF_INET:
+                               error = in_gif_setopts(sc, options);
+                               break;
 #endif
 #ifdef INET6
-       case AF_INET6:
-               return (in6_gif_attach(sc));
+                       case AF_INET6:
+                               error = in6_gif_setopts(sc, options);
+                               break;
 #endif
-       }
-       return (EAFNOSUPPORT);
-}
-
-static int
-gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
-{
-       struct gif_softc *sc = ifp->if_softc;
-       struct gif_softc *tsc;
-#ifdef INET
-       struct ip *ip;
-#endif
-#ifdef INET6
-       struct ip6_hdr *ip6;
-#endif
-       void *hdr;
-       int error = 0;
-
-       if (sc == NULL)
-               return (ENXIO);
-       /* Disallow parallel tunnels unless instructed otherwise. */
-       if (V_parallel_tunnels == 0) {
-               GIF_LIST_LOCK();
-               LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
-                       if (tsc == sc || tsc->gif_family != src->sa_family)
-                               continue;
-#ifdef INET
-                       if (tsc->gif_family == AF_INET &&
-                           tsc->gif_iphdr->ip_src.s_addr ==
-                           satosin(src)->sin_addr.s_addr &&
-                           tsc->gif_iphdr->ip_dst.s_addr ==
-                           satosin(dst)->sin_addr.s_addr) {
-                               error = EADDRNOTAVAIL;
-                               GIF_LIST_UNLOCK();
-                               goto bad;
+                       default:
+                               /* No need to invoke AF-handler */
+                               sc->gif_options = options;
                        }
-#endif
-#ifdef INET6
-                       if (tsc->gif_family == AF_INET6 &&
-                           IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
-                           &satosin6(src)->sin6_addr) &&
-                           IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
-                           &satosin6(dst)->sin6_addr)) {
-                               error = EADDRNOTAVAIL;
-                               GIF_LIST_UNLOCK();
-                               goto bad;
-                       }
-#endif
                }
-               GIF_LIST_UNLOCK();
+               break;
+       default:
+               error = EINVAL;
+               break;
        }
-       switch (src->sa_family) {
+       if (error == 0 && sc->gif_family != 0) {
+               if (
 #ifdef INET
-       case AF_INET:
-               hdr = ip = malloc(sizeof(struct ip), M_GIF,
-                   M_WAITOK | M_ZERO);
-               ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
-               ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
-               break;
+                   cmd == SIOCSIFPHYADDR ||
 #endif
 #ifdef INET6
-       case AF_INET6:
-               hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
-                   M_WAITOK | M_ZERO);
-               ip6->ip6_src = satosin6(src)->sin6_addr;
-               ip6->ip6_dst = satosin6(dst)->sin6_addr;
-               ip6->ip6_vfc = IPV6_VERSION;
-               break;
+                   cmd == SIOCSIFPHYADDR_IN6 ||
 #endif
-       default:
-               return (EAFNOSUPPORT);
+                   0) {
+                       ifp->if_drv_flags |= IFF_DRV_RUNNING;
+                       if_link_state_change(ifp, LINK_STATE_UP);
+               }
        }
-
-       if (sc->gif_family != src->sa_family)
-               gif_detach(sc, sc->gif_family);
-       if (sc->gif_family == 0 ||
-           sc->gif_family != src->sa_family)
-               error = gif_attach(sc, src->sa_family);
-
-       GIF_WLOCK(sc);
-       if (sc->gif_family != 0)
-               free(sc->gif_hdr, M_GIF);
-       sc->gif_family = src->sa_family;
-       sc->gif_hdr = hdr;
-       GIF_WUNLOCK(sc);
-#if defined(INET) || defined(INET6)
 bad:
-#endif
-       if (error == 0 && sc->gif_family != 0) {
-               ifp->if_drv_flags |= IFF_DRV_RUNNING;
-               if_link_state_change(ifp, LINK_STATE_UP);
-       } else {
-               ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-               if_link_state_change(ifp, LINK_STATE_DOWN);
-       }
+       sx_xunlock(&gif_ioctl_sx);
        return (error);
 }
 
 static void
-gif_delete_tunnel(struct ifnet *ifp)
+gif_delete_tunnel(struct gif_softc *sc)
 {
-       struct gif_softc *sc = ifp->if_softc;
-       int family;
 
-       if (sc == NULL)
-               return;
-
-       GIF_WLOCK(sc);
-       family = sc->gif_family;
-       sc->gif_family = 0;
-       GIF_WUNLOCK(sc);
-       if (family != 0) {
-               gif_detach(sc, family);
+       sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+       if (sc->gif_family != 0) {
+               CK_LIST_REMOVE(sc, chain);
+               /* Wait until it become safe to free gif_hdr */
+               GIF_WAIT();
                free(sc->gif_hdr, M_GIF);
        }
-       ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-       if_link_state_change(ifp, LINK_STATE_DOWN);
+       sc->gif_family = 0;
+       GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+       if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);
 }
+

Modified: head/sys/net/if_gif.h
==============================================================================
--- head/sys/net/if_gif.h       Tue Jun  5 20:54:29 2018        (r334672)
+++ head/sys/net/if_gif.h       Tue Jun  5 21:24:59 2018        (r334673)
@@ -5,6 +5,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <a...@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,14 +37,9 @@
 #define _NET_IF_GIF_H_
 
 #ifdef _KERNEL
-#include "opt_inet.h"
-#include "opt_inet6.h"
 
-#include <netinet/in.h>
-
 struct ip;
 struct ip6_hdr;
-struct encaptab;
 
 extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
                int af);
@@ -55,8 +51,6 @@ extern        void (*ng_gif_detach_p)(struct ifnet *ifp);
 
 struct gif_softc {
        struct ifnet            *gif_ifp;
-       struct rmlock           gif_lock;
-       const struct encaptab   *gif_ecookie;
        int                     gif_family;
        int                     gif_flags;
        u_int                   gif_fibnum;
@@ -65,28 +59,22 @@ struct gif_softc {
        union {
                void            *hdr;
                struct ip       *iphdr;
-#ifdef INET6
                struct ip6_hdr  *ip6hdr;
-#endif
        } gif_uhdr;
-       LIST_ENTRY(gif_softc)   gif_list; /* all gif's are linked */
+
+       CK_LIST_ENTRY(gif_softc) chain;
 };
-#define        GIF2IFP(sc)     ((sc)->gif_ifp)
-#define        GIF_LOCK_INIT(sc)       rm_init(&(sc)->gif_lock, "gif softc")
-#define        GIF_LOCK_DESTROY(sc)    rm_destroy(&(sc)->gif_lock)
-#define        GIF_RLOCK_TRACKER       struct rm_priotracker gif_tracker
-#define        GIF_RLOCK(sc)           rm_rlock(&(sc)->gif_lock, &gif_tracker)
-#define        GIF_RUNLOCK(sc)         rm_runlock(&(sc)->gif_lock, 
&gif_tracker)
-#define        GIF_RLOCK_ASSERT(sc)    rm_assert(&(sc)->gif_lock, RA_RLOCKED)
-#define        GIF_WLOCK(sc)           rm_wlock(&(sc)->gif_lock)
-#define        GIF_WUNLOCK(sc)         rm_wunlock(&(sc)->gif_lock)
-#define        GIF_WLOCK_ASSERT(sc)    rm_assert(&(sc)->gif_lock, RA_WLOCKED)
+CK_LIST_HEAD(gif_list, gif_softc);
+MALLOC_DECLARE(M_GIF);
 
+#ifndef GIF_HASH_SIZE
+#define        GIF_HASH_SIZE   (1 << 4)
+#endif
+
+#define        GIF2IFP(sc)     ((sc)->gif_ifp)
 #define        gif_iphdr       gif_uhdr.iphdr
 #define        gif_hdr         gif_uhdr.hdr
-#ifdef INET6
 #define        gif_ip6hdr      gif_uhdr.ip6hdr
-#endif
 
 #define GIF_MTU                (1280)  /* Default MTU */
 #define        GIF_MTU_MIN     (1280)  /* Minimum MTU */
@@ -108,21 +96,29 @@ struct etherip_header {
 /* mbuf adjust factor to force 32-bit alignment of IP header */
 #define        ETHERIP_ALIGN           2
 
+#define        GIF_RLOCK()     epoch_enter_preempt(net_epoch_preempt)
+#define        GIF_RUNLOCK()   epoch_exit_preempt(net_epoch_preempt)
+#define        GIF_WAIT()      epoch_wait_preempt(net_epoch_preempt)
+
 /* Prototypes */
+struct gif_list *gif_hashinit(void);
+void gif_hashdestroy(struct gif_list *);
+
 void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
 int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
               struct route *);
-int gif_encapcheck(const struct mbuf *, int, int, void *);
-#ifdef INET
+
+void in_gif_init(void);
+void in_gif_uninit(void);
 int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
-int in_gif_encapcheck(const struct mbuf *, int, int, void *);
-int in_gif_attach(struct gif_softc *);
-#endif
-#ifdef INET6
+int in_gif_ioctl(struct gif_softc *, u_long, caddr_t);
+int in_gif_setopts(struct gif_softc *, u_int);
+
+void in6_gif_init(void);
+void in6_gif_uninit(void);
 int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
-int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
-int in6_gif_attach(struct gif_softc *);
-#endif
+int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t);
+int in6_gif_setopts(struct gif_softc *, u_int);
 #endif /* _KERNEL */
 
 #define GIFGOPTS       _IOWR('i', 150, struct ifreq)

Modified: head/sys/netinet/in_gif.c
==============================================================================
--- head/sys/netinet/in_gif.c   Tue Jun  5 20:54:29 2018        (r334672)
+++ head/sys/netinet/in_gif.c   Tue Jun  5 21:24:59 2018        (r334673)
@@ -2,6 +2,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <a...@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,9 +39,8 @@ __FBSDID("$FreeBSD$");
 #include "opt_inet6.h"
 
 #include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 
+#include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
@@ -75,15 +76,155 @@ static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets");
 
+/*
+ * We keep interfaces in a hash table using src+dst as key.
+ * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
+ */
+static VNET_DEFINE(struct gif_list *, ipv4_hashtbl) = NULL;
+static VNET_DEFINE(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER();
+#define        V_ipv4_hashtbl          VNET(ipv4_hashtbl)
+#define        V_ipv4_list             VNET(ipv4_list)
+
+#define        GIF_HASH(src, dst)      (V_ipv4_hashtbl[\
+    in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
+#define        GIF_HASH_SC(sc)         
GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\
+    (sc)->gif_iphdr->ip_dst.s_addr)
+static uint32_t
+in_gif_hashval(in_addr_t src, in_addr_t dst)
+{
+       uint32_t ret;
+
+       ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+       return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static int
+in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst)
+{
+       struct gif_softc *tmp;
+
+       if (sc->gif_family == AF_INET &&
+           sc->gif_iphdr->ip_src.s_addr == src &&
+           sc->gif_iphdr->ip_dst.s_addr == dst)
+               return (EEXIST);
+
+       CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
+               if (tmp == sc)
+                       continue;
+               if (tmp->gif_iphdr->ip_src.s_addr == src &&
+                   tmp->gif_iphdr->ip_dst.s_addr == dst)
+                       return (EADDRNOTAVAIL);
+       }
+       return (0);
+}
+
+static void
+in_gif_attach(struct gif_softc *sc)
+{
+
+       if (sc->gif_options & GIF_IGNORE_SOURCE)
+               CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain);
+       else
+               CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
+}
+
 int
+in_gif_setopts(struct gif_softc *sc, u_int options)
+{
+
+       /* NOTE: we are protected with gif_ioctl_sx lock */
+       MPASS(sc->gif_family == AF_INET);
+       MPASS(sc->gif_options != options);
+
+       if ((options & GIF_IGNORE_SOURCE) !=
+           (sc->gif_options & GIF_IGNORE_SOURCE)) {
+               CK_LIST_REMOVE(sc, chain);
+               sc->gif_options = options;
+               in_gif_attach(sc);
+       }
+       return (0);
+}
+
+int
+in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
+{
+       struct ifreq *ifr = (struct ifreq *)data;
+       struct sockaddr_in *dst, *src;
+       struct ip *ip;
+       int error;
+
+       /* NOTE: we are protected with gif_ioctl_sx lock */
+       error = EINVAL;
+       switch (cmd) {
+       case SIOCSIFPHYADDR:
+               src = &((struct in_aliasreq *)data)->ifra_addr;
+               dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
+
+               /* sanity checks */
+               if (src->sin_family != dst->sin_family ||
+                   src->sin_family != AF_INET ||
+                   src->sin_len != dst->sin_len ||
+                   src->sin_len != sizeof(*src))
+                       break;
+               if (src->sin_addr.s_addr == INADDR_ANY ||
+                   dst->sin_addr.s_addr == INADDR_ANY) {
+                       error = EADDRNOTAVAIL;
+                       break;
+               }
+               if (V_ipv4_hashtbl == NULL)
+                       V_ipv4_hashtbl = gif_hashinit();
+               error = in_gif_checkdup(sc, src->sin_addr.s_addr,
+                   dst->sin_addr.s_addr);
+               if (error == EADDRNOTAVAIL)
+                       break;
+               if (error == EEXIST) {
+                       /* Addresses are the same. Just return. */
+                       error = 0;
+                       break;
+               }
+               ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO);
+               ip->ip_src.s_addr = src->sin_addr.s_addr;
+               ip->ip_dst.s_addr = dst->sin_addr.s_addr;
+               if (sc->gif_family != 0) {
+                       /* Detach existing tunnel first */
+                       CK_LIST_REMOVE(sc, chain);
+                       GIF_WAIT();
+                       free(sc->gif_hdr, M_GIF);
+                       /* XXX: should we notify about link state change? */
+               }
+               sc->gif_family = AF_INET;
+               sc->gif_iphdr = ip;
+               in_gif_attach(sc);
+               break;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to