Author: ae
Date: Wed Apr 24 09:05:45 2019
New Revision: 346630
URL: https://svnweb.freebsd.org/changeset/base/346630

Log:
  Add GRE-in-UDP encapsulation support as defined in RFC8086.
  
  This GRE-in-UDP encapsulation allows the UDP source port field to be
  used as an entropy field for load-balancing of GRE traffic in transit
  networks. Also most of multiqueue network cards are able distribute
  incoming UDP datagrams to different NIC queues, while very little are
  able do this for GRE packets.
  
  When an administrator enables UDP encapsulation with command
  `ifconfig gre0 udpencap`, the driver creates kernel socket, that binds
  to tunnel source address and after udp_set_kernel_tunneling() starts
  receiving of all UDP packets destined to 4754 port. Each kernel socket
  maintains list of tunnels with different destination addresses. Thus
  when several tunnels use the same source address, they all handled by
  single socket.  The IP[V6]_BINDANY socket option is used to be able bind
  socket to source address even if it is not yet available in the system.
  This may happen on system boot, when gre(4) interface is created before
  source address become available. The encapsulation and sending of packets
  is done directly from gre(4) into ip[6]_output() without using sockets.
  
  Reviewed by:  eugen
  MFC after:    1 month
  Relnotes:     yes
  Differential Revision:        https://reviews.freebsd.org/D19921

Modified:
  head/sbin/ifconfig/ifgre.c
  head/share/man/man4/gre.4
  head/sys/modules/if_gre/Makefile
  head/sys/net/if_gre.c
  head/sys/net/if_gre.h
  head/sys/netinet/ip_gre.c
  head/sys/netinet6/ip6_gre.c

Modified: head/sbin/ifconfig/ifgre.c
==============================================================================
--- head/sbin/ifconfig/ifgre.c  Wed Apr 24 06:41:52 2019        (r346629)
+++ head/sbin/ifconfig/ifgre.c  Wed Apr 24 09:05:45 2019        (r346630)
@@ -44,15 +44,16 @@ __FBSDID("$FreeBSD$");
 
 #include "ifconfig.h"
 
-#define        GREBITS "\020\01ENABLE_CSUM\02ENABLE_SEQ"
+#define        GREBITS "\020\01ENABLE_CSUM\02ENABLE_SEQ\03UDPENCAP"
 
 static void gre_status(int s);
 
 static void
 gre_status(int s)
 {
-       uint32_t opts = 0;
+       uint32_t opts, port;
 
+       opts = 0;
        ifr.ifr_data = (caddr_t)&opts;
        if (ioctl(s, GREGKEY, &ifr) == 0)
                if (opts != 0)
@@ -60,6 +61,11 @@ gre_status(int s)
        opts = 0;
        if (ioctl(s, GREGOPTS, &ifr) != 0 || opts == 0)
                return;
+
+       port = 0;
+       ifr.ifr_data = (caddr_t)&port;
+       if (ioctl(s, GREGPORT, &ifr) == 0 && port != 0)
+               printf("\tudpport: %u\n", port);
        printb("\toptions", opts, GREBITS);
        putchar('\n');
 }
@@ -77,6 +83,18 @@ setifgrekey(const char *val, int dummy __unused, int s
 }
 
 static void
+setifgreport(const char *val, int dummy __unused, int s,
+    const struct afswtch *afp)
+{
+       uint32_t udpport = strtol(val, NULL, 0);
+
+       strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
+       ifr.ifr_data = (caddr_t)&udpport;
+       if (ioctl(s, GRESPORT, (caddr_t)&ifr) < 0)
+               warn("ioctl (set udpport)");
+}
+
+static void
 setifgreopts(const char *val, int d, int s, const struct afswtch *afp)
 {
        uint32_t opts;
@@ -101,10 +119,13 @@ setifgreopts(const char *val, int d, int s, const stru
 
 static struct cmd gre_cmds[] = {
        DEF_CMD_ARG("grekey",                   setifgrekey),
+       DEF_CMD_ARG("udpport",                  setifgreport),
        DEF_CMD("enable_csum", GRE_ENABLE_CSUM, setifgreopts),
        DEF_CMD("-enable_csum",-GRE_ENABLE_CSUM,setifgreopts),
        DEF_CMD("enable_seq", GRE_ENABLE_SEQ,   setifgreopts),
        DEF_CMD("-enable_seq",-GRE_ENABLE_SEQ,  setifgreopts),
+       DEF_CMD("udpencap", GRE_UDPENCAP,       setifgreopts),
+       DEF_CMD("-udpencap",-GRE_UDPENCAP,      setifgreopts),
 };
 static struct afswtch af_gre = {
        .af_name        = "af_gre",

Modified: head/share/man/man4/gre.4
==============================================================================
--- head/share/man/man4/gre.4   Wed Apr 24 06:41:52 2019        (r346629)
+++ head/share/man/man4/gre.4   Wed Apr 24 09:05:45 2019        (r346630)
@@ -29,7 +29,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd June 2, 2015
+.Dd April 24, 2019
 .Dt GRE 4
 .Os
 .Sh NAME
@@ -89,7 +89,45 @@ A value of 0 disables the key option.
 Enables checksum calculation for outgoing packets.
 .It Ar enable_seq
 Enables use of sequence number field in the GRE header for outgoing packets.
+.It Ar udpencap
+Enables UDP-in-GRE encapsulation (see the
+.Sx GRE-IN-UDP ENCAPSULATION
+Section below for details).
+.It Ar udpport
+Set the source UDP port for outgoing packets.
+A value of 0 disables the persistence of source UDP port for outgoing packets.
+See the
+.Sx GRE-IN-UDP ENCAPSULATION
+Section below for details.
 .El
+.Sh GRE-IN-UDP ENCAPSULATION
+The
+.Nm
+supports GRE in UDP encapsulation as defined in RFC 8086.
+A GRE in UDP tunnel offers the possibility of better performance for
+load-balancing GRE traffic in transit networks.
+Encapsulating GRE in UDP enables use of the UDP source port to provide
+entropy to ECMP hashing.
+.Pp
+The GRE in UDP tunnel uses single value 4754 as UDP destination port.
+The UDP source port contains a 14-bit entropy value that is generated
+by the encapsulator to identify a flow for the encapsulated packet.
+The
+.Ar udpport
+option can be used to disable this behaviour and use single source UDP
+port value.
+The value of
+.Ar udpport
+should be within the ephemeral port range, i.e., 49152 to 65535 by default.
+.Pp
+Note that a GRE in UDP tunnel is unidirectional; the tunnel traffic is not
+expected to be returned back to the UDP source port values used to generate
+entropy.
+This may impact NAPT (Network Address Port Translator) middleboxes.
+If such tunnels are expected to be used on a path with a middlebox,
+the tunnel can be configured either to disable use of the UDP source port
+for entropy or to enable middleboxes to pass packets with UDP source port
+entropy.
 .Sh EXAMPLES
 .Bd -literal
 192.168.1.* --- Router A  -------tunnel-------- Router B --- 192.168.2.*

Modified: head/sys/modules/if_gre/Makefile
==============================================================================
--- head/sys/modules/if_gre/Makefile    Wed Apr 24 06:41:52 2019        
(r346629)
+++ head/sys/modules/if_gre/Makefile    Wed Apr 24 09:05:45 2019        
(r346630)
@@ -5,7 +5,7 @@ SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 KMOD=  if_gre
-SRCS=  if_gre.c opt_inet.h opt_inet6.h
+SRCS=  if_gre.c opt_inet.h opt_inet6.h opt_rss.h
 SRCS.INET=     ip_gre.c
 SRCS.INET6=    ip6_gre.c
 

Modified: head/sys/net/if_gre.c
==============================================================================
--- head/sys/net/if_gre.c       Wed Apr 24 06:41:52 2019        (r346629)
+++ head/sys/net/if_gre.c       Wed Apr 24 09:05:45 2019        (r346630)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
+#include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -49,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
@@ -65,19 +67,27 @@ __FBSDID("$FreeBSD$");
 #include <net/route.h>
 
 #include <netinet/in.h>
+#include <netinet/in_pcb.h>
 #ifdef INET
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
+#ifdef RSS
+#include <netinet/in_rss.h>
 #endif
+#endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
+#ifdef RSS
+#include <netinet6/in6_rss.h>
 #endif
+#endif
 
 #include <netinet/ip_encap.h>
+#include <netinet/udp.h>
 #include <net/bpf.h>
 #include <net/if_gre.h>
 
@@ -151,6 +161,7 @@ vnet_gre_uninit(const void *unused __unused)
 #ifdef INET6
        in6_gre_uninit();
 #endif
+       /* XXX: epoch_call drain */
 }
 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_uninit, NULL);
@@ -266,6 +277,7 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                break;
        case GRESKEY:
        case GRESOPTS:
+       case GRESPORT:
                if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
                        break;
                if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
@@ -281,23 +293,45 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                        }
                        if (sc->gre_options == opt)
                                break;
+               } else if (cmd == GRESPORT) {
+                       if (opt != 0 && (opt < V_ipport_hifirstauto ||
+                           opt > V_ipport_hilastauto)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       if (sc->gre_port == opt)
+                               break;
+                       if ((sc->gre_options & GRE_UDPENCAP) == 0) {
+                               /*
+                                * UDP encapsulation is not enabled, thus
+                                * there is no need to reattach softc.
+                                */
+                               sc->gre_port = opt;
+                               break;
+                       }
                }
                switch (sc->gre_family) {
 #ifdef INET
                case AF_INET:
-                       in_gre_setopts(sc, cmd, opt);
+                       error = in_gre_setopts(sc, cmd, opt);
                        break;
 #endif
 #ifdef INET6
                case AF_INET6:
-                       in6_gre_setopts(sc, cmd, opt);
+                       error = in6_gre_setopts(sc, cmd, opt);
                        break;
 #endif
                default:
+                       /*
+                        * Tunnel is not yet configured.
+                        * We can just change any parameters.
+                        */
                        if (cmd == GRESKEY)
                                sc->gre_key = opt;
-                       else
+                       if (cmd == GRESOPTS)
                                sc->gre_options = opt;
+                       if (cmd == GRESPORT)
+                               sc->gre_port = opt;
                        break;
                }
                /*
@@ -313,6 +347,10 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
                    sizeof(sc->gre_options));
                break;
+       case GREGPORT:
+               error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
+                   sizeof(sc->gre_port));
+               break;
        default:
                error = EINVAL;
                break;
@@ -337,6 +375,7 @@ end:
 static void
 gre_delete_tunnel(struct gre_softc *sc)
 {
+       struct gre_socket *gs;
 
        sx_assert(&gre_ioctl_sx, SA_XLOCKED);
        if (sc->gre_family != 0) {
@@ -346,6 +385,16 @@ gre_delete_tunnel(struct gre_softc *sc)
                free(sc->gre_hdr, M_GRE);
                sc->gre_family = 0;
        }
+       /*
+        * If this Tunnel was the last one that could use UDP socket,
+        * we should unlink socket from hash table and close it.
+        */
+       if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
+               CK_LIST_REMOVE(gs, chain);
+               soclose(gs->so);
+               epoch_call(net_epoch_preempt, &gs->epoch_ctx, gre_sofree);
+               sc->gre_so = NULL;
+       }
        GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
        if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
 }
@@ -372,8 +421,39 @@ gre_hashdestroy(struct gre_list *hash)
 }
 
 void
-gre_updatehdr(struct gre_softc *sc, struct grehdr *gh)
+gre_sofree(epoch_context_t ctx)
 {
+       struct gre_socket *gs;
+
+       gs = __containerof(ctx, struct gre_socket, epoch_ctx);
+       free(gs, M_GRE);
+}
+
+static __inline uint16_t
+gre_cksum_add(uint16_t sum, uint16_t a)
+{
+       uint16_t res;
+
+       res = sum + a;
+       return (res + (res < a));
+}
+
+void
+gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
+{
+
+       sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+       MPASS(sc->gre_options & GRE_UDPENCAP);
+
+       udp->uh_dport = htons(GRE_UDPPORT);
+       udp->uh_sport = htons(sc->gre_port);
+       udp->uh_sum = csum;
+       udp->uh_ulen = 0;
+}
+
+void
+gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
+{
        uint32_t *opts;
        uint16_t flags;
 
@@ -539,6 +619,52 @@ gre_setseqn(struct grehdr *gh, uint32_t seq)
        *opts = htonl(seq);
 }
 
+static uint32_t
+gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
+{
+       uint32_t flowid;
+
+       if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
+               return (0);
+#ifndef RSS
+       switch (af) {
+#ifdef INET
+       case AF_INET:
+               flowid = mtod(m, struct ip *)->ip_src.s_addr ^
+                   mtod(m, struct ip *)->ip_dst.s_addr;
+               break;
+#endif
+#ifdef INET6
+       case AF_INET6:
+               flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
+                   mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
+               break;
+#endif
+       default:
+               flowid = 0;
+       }
+#else /* RSS */
+       switch (af) {
+#ifdef INET
+       case AF_INET:
+               flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
+                   mtod(m, struct ip *)->ip_dst);
+               break;
+#endif
+#ifdef INET6
+       case AF_INET6:
+               flowid = rss_hash_ip6_2tuple(
+                   &mtod(m, struct ip6_hdr *)->ip6_src,
+                   &mtod(m, struct ip6_hdr *)->ip6_dst);
+               break;
+#endif
+       default:
+               flowid = 0;
+       }
+#endif
+       return (flowid);
+}
+
 #define        MTAG_GRE        1307983903
 static int
 gre_transmit(struct ifnet *ifp, struct mbuf *m)
@@ -546,7 +672,8 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
        GRE_RLOCK_TRACKER;
        struct gre_softc *sc;
        struct grehdr *gh;
-       uint32_t af;
+       struct udphdr *uh;
+       uint32_t af, flowid;
        int error, len;
        uint16_t proto;
 
@@ -573,6 +700,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
        af = m->m_pkthdr.csum_data;
        BPF_MTAP2(ifp, &af, sizeof(af), m);
        m->m_flags &= ~(M_BCAST|M_MCAST);
+       flowid = gre_flowid(sc, m, af);
        M_SETFIB(m, sc->gre_fibnum);
        M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
        if (m == NULL) {
@@ -614,6 +742,19 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
                error = ENETDOWN;
                goto drop;
        }
+       if (sc->gre_options & GRE_UDPENCAP) {
+               uh = (struct udphdr *)mtodo(m, len);
+               uh->uh_sport |= htons(V_ipport_hifirstauto) |
+                   (flowid >> 16) | (flowid & 0xFFFF);
+               uh->uh_sport = htons(ntohs(uh->uh_sport) %
+                   V_ipport_hilastauto);
+               uh->uh_ulen = htons(m->m_pkthdr.len - len);
+               uh->uh_sum = gre_cksum_add(uh->uh_sum,
+                   htons(m->m_pkthdr.len - len + IPPROTO_UDP));
+               m->m_pkthdr.csum_flags = sc->gre_csumflags;
+               m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+               len += sizeof(struct udphdr);
+       }
        gh = (struct grehdr *)mtodo(m, len);
        gh->gre_proto = proto;
        if (sc->gre_options & GRE_ENABLE_SEQ)
@@ -631,7 +772,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
 #endif
 #ifdef INET6
        case AF_INET6:
-               error = in6_gre_output(m, af, sc->gre_hlen);
+               error = in6_gre_output(m, af, sc->gre_hlen, flowid);
                break;
 #endif
        default:

Modified: head/sys/net/if_gre.h
==============================================================================
--- head/sys/net/if_gre.h       Wed Apr 24 06:41:52 2019        (r346629)
+++ head/sys/net/if_gre.h       Wed Apr 24 09:05:45 2019        (r346630)
@@ -53,15 +53,36 @@ struct greip {
        struct ip       gi_ip;
        struct grehdr   gi_gre;
 } __packed;
-#endif
 
+struct greudp {
+       struct ip       gi_ip;
+       struct udphdr   gi_udp;
+       struct grehdr   gi_gre;
+} __packed;
+#endif /* INET */
+
 #ifdef INET6
 struct greip6 {
        struct ip6_hdr  gi6_ip6;
        struct grehdr   gi6_gre;
 } __packed;
-#endif
 
+struct greudp6 {
+       struct ip6_hdr  gi6_ip6;
+       struct udphdr   gi6_udp;
+       struct grehdr   gi6_gre;
+} __packed;
+#endif /* INET6 */
+
+CK_LIST_HEAD(gre_list, gre_softc);
+CK_LIST_HEAD(gre_sockets, gre_socket);
+struct gre_socket {
+       struct socket           *so;
+       struct gre_list         list;
+       CK_LIST_ENTRY(gre_socket) chain;
+       struct epoch_context    epoch_ctx;
+};
+
 struct gre_softc {
        struct ifnet            *gre_ifp;
        int                     gre_family;     /* AF of delivery header */
@@ -69,22 +90,26 @@ struct gre_softc {
        uint32_t                gre_oseq;
        uint32_t                gre_key;
        uint32_t                gre_options;
+       uint32_t                gre_csumflags;
+       uint32_t                gre_port;
        u_int                   gre_fibnum;
        u_int                   gre_hlen;       /* header size */
        union {
                void            *hdr;
 #ifdef INET
-               struct greip    *gihdr;
+               struct greip    *iphdr;
+               struct greudp   *udphdr;
 #endif
 #ifdef INET6
-               struct greip6   *gi6hdr;
+               struct greip6   *ip6hdr;
+               struct greudp6  *udp6hdr;
 #endif
        } gre_uhdr;
+       struct gre_socket       *gre_so;
 
        CK_LIST_ENTRY(gre_softc) chain;
        CK_LIST_ENTRY(gre_softc) srchash;
 };
-CK_LIST_HEAD(gre_list, gre_softc);
 MALLOC_DECLARE(M_GRE);
 
 #ifndef GRE_HASH_SIZE
@@ -98,28 +123,35 @@ MALLOC_DECLARE(M_GRE);
 #define        GRE_WAIT()              epoch_wait_preempt(net_epoch_preempt)
 
 #define        gre_hdr                 gre_uhdr.hdr
-#define        gre_gihdr               gre_uhdr.gihdr
-#define        gre_gi6hdr              gre_uhdr.gi6hdr
-#define        gre_oip                 gre_gihdr->gi_ip
-#define        gre_oip6                gre_gi6hdr->gi6_ip6
+#define        gre_iphdr               gre_uhdr.iphdr
+#define        gre_ip6hdr              gre_uhdr.ip6hdr
+#define        gre_udphdr              gre_uhdr.udphdr
+#define        gre_udp6hdr             gre_uhdr.udp6hdr
 
+#define        gre_oip                 gre_iphdr->gi_ip
+#define        gre_udp                 gre_udphdr->gi_udp
+#define        gre_oip6                gre_ip6hdr->gi6_ip6
+#define        gre_udp6                gre_udp6hdr->gi6_udp
+
 struct gre_list *gre_hashinit(void);
 void gre_hashdestroy(struct gre_list *);
 
 int    gre_input(struct mbuf *, int, int, void *);
-void   gre_updatehdr(struct gre_softc *, struct grehdr *);
+void   gre_update_hdr(struct gre_softc *, struct grehdr *);
+void   gre_update_udphdr(struct gre_softc *, struct udphdr *, uint16_t);
+void   gre_sofree(epoch_context_t);
 
 void   in_gre_init(void);
 void   in_gre_uninit(void);
-void   in_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int    in_gre_setopts(struct gre_softc *, u_long, uint32_t);
 int    in_gre_ioctl(struct gre_softc *, u_long, caddr_t);
 int    in_gre_output(struct mbuf *, int, int);
 
 void   in6_gre_init(void);
 void   in6_gre_uninit(void);
-void   in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int    in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
 int    in6_gre_ioctl(struct gre_softc *, u_long, caddr_t);
-int    in6_gre_output(struct mbuf *, int, int);
+int    in6_gre_output(struct mbuf *, int, int, uint32_t);
 /*
  * CISCO uses special type for GRE tunnel created as part of WCCP
  * connection, while in fact those packets are just IPv4 encapsulated
@@ -139,9 +171,15 @@ int        in6_gre_output(struct mbuf *, int, int);
 #define        GRESKEY         _IOW('i', 108, struct ifreq)
 #define        GREGOPTS        _IOWR('i', 109, struct ifreq)
 #define        GRESOPTS        _IOW('i', 110, struct ifreq)
+#define        GREGPORT        _IOWR('i', 111, struct ifreq)
+#define        GRESPORT        _IOW('i', 112, struct ifreq)
 
+/* GRE-in-UDP encapsulation destination port as defined in RFC8086 */
+#define        GRE_UDPPORT             4754
+
 #define        GRE_ENABLE_CSUM         0x0001
 #define        GRE_ENABLE_SEQ          0x0002
-#define        GRE_OPTMASK             (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
+#define        GRE_UDPENCAP            0x0004
+#define        GRE_OPTMASK             
(GRE_ENABLE_CSUM|GRE_ENABLE_SEQ|GRE_UDPENCAP)
 
 #endif /* _NET_IF_GRE_H_ */

Modified: head/sys/netinet/ip_gre.c
==============================================================================
--- head/sys/netinet/ip_gre.c   Wed Apr 24 06:41:52 2019        (r346629)
+++ head/sys/netinet/ip_gre.c   Wed Apr 24 09:05:45 2019        (r346630)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/jail.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
@@ -58,15 +59,19 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <net/if_gre.h>
+#include <machine/in_cksum.h>
 
 #define        GRE_TTL                 30
 VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
@@ -74,14 +79,22 @@ VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
 
+struct in_gre_socket {
+       struct gre_socket               base;
+       in_addr_t                       addr;
+};
+VNET_DEFINE_STATIC(struct gre_sockets *, ipv4_sockets) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv4_srchashtbl) = NULL;
+#define        V_ipv4_sockets          VNET(ipv4_sockets)
 #define        V_ipv4_hashtbl          VNET(ipv4_hashtbl)
 #define        V_ipv4_srchashtbl       VNET(ipv4_srchashtbl)
 #define        GRE_HASH(src, dst)      (V_ipv4_hashtbl[\
     in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
 #define        GRE_SRCHASH(src)        (V_ipv4_srchashtbl[\
     fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
+#define        GRE_SOCKHASH(src)       (V_ipv4_sockets[\
+    fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
 #define        GRE_HASH_SC(sc)         GRE_HASH((sc)->gre_oip.ip_src.s_addr,\
     (sc)->gre_oip.ip_dst.s_addr)
 
@@ -94,17 +107,43 @@ in_gre_hashval(in_addr_t src, in_addr_t dst)
        return (fnv_32_buf(&dst, sizeof(dst), ret));
 }
 
+static struct gre_socket*
+in_gre_lookup_socket(in_addr_t addr)
+{
+       struct gre_socket *gs;
+       struct in_gre_socket *s;
+
+       CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) {
+               s = __containerof(gs, struct in_gre_socket, base);
+               if (s->addr == addr)
+                       break;
+       }
+       return (gs);
+}
+
 static int
-in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst)
+in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst,
+    uint32_t opts)
 {
+       struct gre_list *head;
        struct gre_softc *tmp;
+       struct gre_socket *gs;
 
        if (sc->gre_family == AF_INET &&
            sc->gre_oip.ip_src.s_addr == src &&
-           sc->gre_oip.ip_dst.s_addr == dst)
+           sc->gre_oip.ip_dst.s_addr == dst &&
+           (sc->gre_options & GRE_UDPENCAP) == (opts & GRE_UDPENCAP))
                return (EEXIST);
 
-       CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) {
+       if (opts & GRE_UDPENCAP) {
+               gs = in_gre_lookup_socket(src);
+               if (gs == NULL)
+                       return (0);
+               head = &gs->list;
+       } else
+               head = &GRE_HASH(src, dst);
+
+       CK_LIST_FOREACH(tmp, head, chain) {
                if (tmp == sc)
                        continue;
                if (tmp->gre_oip.ip_src.s_addr == src &&
@@ -181,35 +220,228 @@ in_gre_srcaddr(void *arg __unused, const struct sockad
 }
 
 static void
+in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
+    const struct sockaddr *sa, void *ctx)
+{
+       struct epoch_tracker et;
+       struct gre_socket *gs;
+       struct gre_softc *sc;
+       in_addr_t dst;
+
+       NET_EPOCH_ENTER(et);
+       /*
+        * udp_append() holds reference to inp, it is safe to check
+        * inp_flags2 without INP_RLOCK().
+        * If socket was closed before we have entered NET_EPOCH section,
+        * INP_FREED flag should be set. Otherwise it should be safe to
+        * make access to ctx data, because gre_so will be freed by
+        * gre_sofree() via epoch_call().
+        */
+       if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+               NET_EPOCH_EXIT(et);
+               m_freem(m);
+               return;
+       }
+
+       gs = (struct gre_socket *)ctx;
+       dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr;
+       CK_LIST_FOREACH(sc, &gs->list, chain) {
+               if (sc->gre_oip.ip_dst.s_addr == dst)
+                       break;
+       }
+       if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
+               gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
+               NET_EPOCH_EXIT(et);
+               return;
+       }
+       m_freem(m);
+       NET_EPOCH_EXIT(et);
+}
+
+static int
+in_gre_setup_socket(struct gre_softc *sc)
+{
+       struct sockopt sopt;
+       struct sockaddr_in sin;
+       struct in_gre_socket *s;
+       struct gre_socket *gs;
+       in_addr_t addr;
+       int error, value;
+
+       /*
+        * NOTE: we are protected with gre_ioctl_sx lock.
+        *
+        * First check that socket is already configured.
+        * If so, check that source addres was not changed.
+        * If address is different, check that there are no other tunnels
+        * and close socket.
+        */
+       addr = sc->gre_oip.ip_src.s_addr;
+       gs = sc->gre_so;
+       if (gs != NULL) {
+               s = __containerof(gs, struct in_gre_socket, base);
+               if (s->addr != addr) {
+                       if (CK_LIST_EMPTY(&gs->list)) {
+                               CK_LIST_REMOVE(gs, chain);
+                               soclose(gs->so);
+                               epoch_call(net_epoch_preempt, &gs->epoch_ctx,
+                                   gre_sofree);
+                       }
+                       gs = sc->gre_so = NULL;
+               }
+       }
+
+       if (gs == NULL) {
+               /*
+                * Check that socket for given address is already
+                * configured.
+                */
+               gs = in_gre_lookup_socket(addr);
+               if (gs == NULL) {
+                       s = malloc(sizeof(*s), M_GRE, M_WAITOK | M_ZERO);
+                       s->addr = addr;
+                       gs = &s->base;
+
+                       error = socreate(sc->gre_family, &gs->so,
+                           SOCK_DGRAM, IPPROTO_UDP, curthread->td_ucred,
+                           curthread);
+                       if (error != 0) {
+                               if_printf(GRE2IFP(sc),
+                                   "cannot create socket: %d\n", error);
+                               free(s, M_GRE);
+                               return (error);
+                       }
+
+                       error = udp_set_kernel_tunneling(gs->so,
+                           in_gre_udp_input, NULL, gs);
+                       if (error != 0) {
+                               if_printf(GRE2IFP(sc),
+                                   "cannot set UDP tunneling: %d\n", error);
+                               goto fail;
+                       }
+
+                       memset(&sopt, 0, sizeof(sopt));
+                       sopt.sopt_dir = SOPT_SET;
+                       sopt.sopt_level = IPPROTO_IP;
+                       sopt.sopt_name = IP_BINDANY;
+                       sopt.sopt_val = &value;
+                       sopt.sopt_valsize = sizeof(value);
+                       value = 1;
+                       error = sosetopt(gs->so, &sopt);
+                       if (error != 0) {
+                               if_printf(GRE2IFP(sc),
+                                   "cannot set IP_BINDANY opt: %d\n", error);
+                               goto fail;
+                       }
+
+                       memset(&sin, 0, sizeof(sin));
+                       sin.sin_family = AF_INET;
+                       sin.sin_len = sizeof(sin);
+                       sin.sin_addr.s_addr = addr;
+                       sin.sin_port = htons(GRE_UDPPORT);
+                       error = sobind(gs->so, (struct sockaddr *)&sin,
+                           curthread);
+                       if (error != 0) {
+                               if_printf(GRE2IFP(sc),
+                                   "cannot bind socket: %d\n", error);
+                               goto fail;
+                       }
+                       /* Add socket to the chain */
+                       CK_LIST_INSERT_HEAD(&GRE_SOCKHASH(addr), gs, chain);
+               }
+       }
+
+       /* Add softc to the socket's list */
+       CK_LIST_INSERT_HEAD(&gs->list, sc, chain);
+       sc->gre_so = gs;
+       return (0);
+fail:
+       soclose(gs->so);
+       free(s, M_GRE);
+       return (error);
+}
+
+static int
 in_gre_attach(struct gre_softc *sc)
 {
+       struct grehdr *gh;
+       int error;
 
-       sc->gre_hlen = sizeof(struct greip);
+       if (sc->gre_options & GRE_UDPENCAP) {
+               sc->gre_csumflags = CSUM_UDP;
+               sc->gre_hlen = sizeof(struct greudp);
+               sc->gre_oip.ip_p = IPPROTO_UDP;
+               gh = &sc->gre_udphdr->gi_gre;
+               gre_update_udphdr(sc, &sc->gre_udp,
+                   in_pseudo(sc->gre_oip.ip_src.s_addr,
+                   sc->gre_oip.ip_dst.s_addr, 0));
+       } else {
+               sc->gre_hlen = sizeof(struct greip);
+               sc->gre_oip.ip_p = IPPROTO_GRE;
+               gh = &sc->gre_iphdr->gi_gre;
+       }
        sc->gre_oip.ip_v = IPVERSION;
        sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
-       sc->gre_oip.ip_p = IPPROTO_GRE;
-       gre_updatehdr(sc, &sc->gre_gihdr->gi_gre);
-       CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
+       gre_update_hdr(sc, gh);
+
+       /*
+        * If we return error, this means that sc is not linked,
+        * and caller should reset gre_family and free(sc->gre_hdr).
+        */
+       if (sc->gre_options & GRE_UDPENCAP) {
+               error = in_gre_setup_socket(sc);
+               if (error != 0)
+                       return (error);
+       } else
+               CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
        CK_LIST_INSERT_HEAD(&GRE_SRCHASH(sc->gre_oip.ip_src.s_addr),
            sc, srchash);
+
+       /* Set IFF_DRV_RUNNING if interface is ready */
+       in_gre_set_running(sc);
+       return (0);
 }
 
-void
+int
 in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value)
 {
+       int error;
 
-       MPASS(cmd == GRESKEY || cmd == GRESOPTS);
-
        /* NOTE: we are protected with gre_ioctl_sx lock */
+       MPASS(cmd == GRESKEY || cmd == GRESOPTS || cmd == GRESPORT);
        MPASS(sc->gre_family == AF_INET);
+
+       /*
+        * If we are going to change encapsulation protocol, do check
+        * for duplicate tunnels. Return EEXIST here to do not confuse
+        * user.
+        */
+       if (cmd == GRESOPTS &&
+           (sc->gre_options & GRE_UDPENCAP) != (value & GRE_UDPENCAP) &&
+           in_gre_checkdup(sc, sc->gre_oip.ip_src.s_addr,
+               sc->gre_oip.ip_dst.s_addr, value) == EADDRNOTAVAIL)
+               return (EEXIST);
+
        CK_LIST_REMOVE(sc, chain);
        CK_LIST_REMOVE(sc, srchash);
        GRE_WAIT();
-       if (cmd == GRESKEY)
+       switch (cmd) {
+       case GRESKEY:
                sc->gre_key = value;
-       else
+               break;
+       case GRESOPTS:
                sc->gre_options = value;
-       in_gre_attach(sc);
+               break;
+       case GRESPORT:
+               sc->gre_port = value;
+               break;
+       }
+       error = in_gre_attach(sc);
+       if (error != 0) {
+               sc->gre_family = 0;
+               free(sc->gre_hdr, M_GRE);
+       }
+       return (error);
 }
 
 int
@@ -241,9 +473,10 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t
                if (V_ipv4_hashtbl == NULL) {
                        V_ipv4_hashtbl = gre_hashinit();
                        V_ipv4_srchashtbl = gre_hashinit();
+                       V_ipv4_sockets = (struct gre_sockets *)gre_hashinit();
                }
                error = in_gre_checkdup(sc, src->sin_addr.s_addr,
-                   dst->sin_addr.s_addr);
+                   dst->sin_addr.s_addr, sc->gre_options);
                if (error == EADDRNOTAVAIL)
                        break;
                if (error == EEXIST) {
@@ -251,7 +484,7 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t
                        error = 0;
                        break;
                }
-               ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t),
+               ip = malloc(sizeof(struct greudp) + 3 * sizeof(uint32_t),
                    M_GRE, M_WAITOK | M_ZERO);
                ip->ip_src.s_addr = src->sin_addr.s_addr;
                ip->ip_dst.s_addr = dst->sin_addr.s_addr;
@@ -267,8 +500,11 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t
                sc->gre_hdr = ip;
                sc->gre_oseq = 0;
                sc->gre_iseq = UINT32_MAX;
-               in_gre_attach(sc);
-               in_gre_set_running(sc);
+               error = in_gre_attach(sc);
+               if (error != 0) {
+                       sc->gre_family = 0;
+                       free(sc->gre_hdr, M_GRE);
+               }
                break;
        case SIOCGIFPSRCADDR:
        case SIOCGIFPDSTADDR:
@@ -354,5 +590,6 @@ in_gre_uninit(void)
                V_ipv4_hashtbl = NULL;
                GRE_WAIT();
                gre_hashdestroy(V_ipv4_srchashtbl);
+               gre_hashdestroy((struct gre_list *)V_ipv4_sockets);
        }
 }

Modified: head/sys/netinet6/ip6_gre.c
==============================================================================
--- head/sys/netinet6/ip6_gre.c Wed Apr 24 06:41:52 2019        (r346629)
+++ head/sys/netinet6/ip6_gre.c Wed Apr 24 09:05:45 2019        (r346630)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/jail.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
@@ -51,8 +52,12 @@ __FBSDID("$FreeBSD$");
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #endif
+#include <netinet/in_pcb.h>
 #include <netinet/ip_encap.h>
+#include <netinet/ip_var.h>
 #include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/scope6_var.h>
@@ -65,14 +70,22 @@ SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip6_gre_hlim), 0, "Default hop limit for encapsulated packets");
 
+struct in6_gre_socket {
+       struct gre_socket       base;
+       struct in6_addr         addr; /* scope zone id is embedded */
+};
+VNET_DEFINE_STATIC(struct gre_sockets *, ipv6_sockets) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv6_hashtbl) = NULL;
 VNET_DEFINE_STATIC(struct gre_list *, ipv6_srchashtbl) = NULL;
+#define        V_ipv6_sockets          VNET(ipv6_sockets)
 #define        V_ipv6_hashtbl          VNET(ipv6_hashtbl)
 #define        V_ipv6_srchashtbl       VNET(ipv6_srchashtbl)
 #define        GRE_HASH(src, dst)      (V_ipv6_hashtbl[\
     in6_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
 #define        GRE_SRCHASH(src)        (V_ipv6_srchashtbl[\
     fnv_32_buf((src), sizeof(*src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
+#define        GRE_SOCKHASH(src)       (V_ipv6_sockets[\
+    fnv_32_buf((src), sizeof(*src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
 #define        GRE_HASH_SC(sc)         GRE_HASH(&(sc)->gre_oip6.ip6_src,\
     &(sc)->gre_oip6.ip6_dst)
 
@@ -85,18 +98,43 @@ in6_gre_hashval(const struct in6_addr *src, const stru
        return (fnv_32_buf(dst, sizeof(*dst), ret));
 }
 
+static struct gre_socket*
+in6_gre_lookup_socket(const struct in6_addr *addr)
+{
+       struct gre_socket *gs;
+       struct in6_gre_socket *s;
+
+       CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) {
+               s = __containerof(gs, struct in6_gre_socket, base);
+               if (IN6_ARE_ADDR_EQUAL(&s->addr, addr))
+                       break;
+       }
+       return (gs);
+}
+
 static int

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to