On Tue, Feb 15, 2022 at 04:49:10PM +1000, David Gwynne wrote:
> On Fri, Feb 11, 2022 at 03:13:25PM +1000, David Gwynne wrote:
> > On Fri, Mar 05, 2021 at 05:09:29PM +1000, David Gwynne wrote:
> > > On Thu, Mar 04, 2021 at 03:36:19PM +1000, David Gwynne wrote:
> > > > as the subject says, this is a rewrite of vxlan(4).
> > > > 
> > > > vxlan(4) relies on bridge(4) to implement learning, but i want to be
> > > > able to remove bridge(4) one day. while working on veb(4), i wrote
> > > > the guts of a learning bridge implementation that is now used by veb(4),
> > > > bpe(4), and nvgre(4). that learning bridge code is now also used by
> > > > vxlan(4).
> > > > 
> > > > this means that a few of the modes that the manpage talks about are
> > > > different now. because vxlan doesnt need a bridge for learning, there's
> > > > no "multicast mode" anymore, it just does "dynamic mode" out of the box
> > > > when configured with a multicast destination address. there's no
> > > > multipoint mode now too.
> > > > 
> > > > another thing that's always bothered me about vxlan(4) is how it 
> > > > occupies
> > > > the "udp namespace" and gets how it steals packets from the udp stack.
> > > > the new code actually creates and bind udp sockets to handle the
> > > > vxlan packets. this means userland can't collide with a vxlan interface,
> > > > and you get to see that the port is in use in things like netstat. e.g.:
> > > > 
> > > > dlg@ikkaku ~$ ifconfig vxlan0
> > > > vxlan0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
> > > >         lladdr fe:e1:ba:d1:17:2a
> > > >         index 11 llprio 3
> > > >         encap: vnetid none parent aggr0 txprio 0 rxprio outer
> > > >         groups: vxlan
> > > >         tunnel: inet 192.0.2.36 port 4789 --> 239.0.0.1 ttl 1 nodf
> > > >         Addresses (max cache: 100, timeout: 240):
> > > >         inet 100.64.1.36 netmask 0xffffff00 broadcast 100.64.1.255
> > > > dlg@ikkaku ~$ netstat -na -f inet -p udp
> > > > Active Internet connections (including servers)
> > > > Proto   Recv-Q Send-Q  Local Address          Foreign Address       
> > > > udp          0      0  130.102.96.36.29742    129.250.35.250.123    
> > > > udp          0      0  130.102.96.36.8965     162.159.200.123.123   
> > > > udp          0      0  130.102.96.36.13189    162.159.200.1.123     
> > > > udp          0      0  130.102.96.36.46580    220.158.215.20.123    
> > > > udp          0      0  130.102.96.36.23109    103.38.121.36.123     
> > > > udp          0      0  239.0.0.1.4789         *.*                   
> > > > udp          0      0  192.0.2.36.4789        *.*                   
> > > > 
> > > > ive also added loop prevention, ie, sending an interfaces vxlan
> > > > packets over itself should fail rather than panic now.
> > > 
> > > here's an updated diff with a few fixes.
> > >
> > 
> > this diff better supports vxlan p2p and multicast vxlan configs that
> > share a UDP listener.
> 
> it doesn't look like anyone (else) cares about vxlan(4), so i'm
> going to commit this tomorrow unless anyone really objects.

I do not use vxlan(4) and I only glanced at the diff but I like what you
did. Especially removing the tentacles from udp by using propper socket
code.

You created a fair amount of static functions, did we change the rule of
no static in the kernel?

I'm OK with you to commit this.
 
> > Index: net/if_vxlan.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_vxlan.c,v
> > retrieving revision 1.83
> > diff -u -p -r1.83 if_vxlan.c
> > --- net/if_vxlan.c  10 Jan 2022 14:07:59 -0000      1.83
> > +++ net/if_vxlan.c  11 Feb 2022 05:11:13 -0000
> > @@ -1,7 +1,7 @@
> > -/* $OpenBSD: if_vxlan.c,v 1.83 2022/01/10 14:07:59 jan Exp $       */
> > +/* $OpenBSD$ */
> >  
> >  /*
> > - * Copyright (c) 2013 Reyk Floeter <r...@openbsd.org>
> > + * Copyright (c) 2021 David Gwynne <d...@openbsd.org>
> >   *
> >   * Permission to use, copy, modify, and distribute this software for any
> >   * purpose with or without fee is hereby granted, provided that the above
> > @@ -17,475 +17,781 @@
> >   */
> >  
> >  #include "bpfilter.h"
> > -#include "vxlan.h"
> > -#include "vlan.h"
> >  #include "pf.h"
> > -#include "bridge.h"
> >  
> >  #include <sys/param.h>
> >  #include <sys/systm.h>
> > +#include <sys/kernel.h>
> >  #include <sys/mbuf.h>
> >  #include <sys/socket.h>
> > -#include <sys/sockio.h>
> >  #include <sys/ioctl.h>
> > +#include <sys/timeout.h>
> > +#include <sys/pool.h>
> > +#include <sys/tree.h>
> > +#include <sys/refcnt.h>
> > +#include <sys/smr.h>
> > +
> > +#include <sys/socket.h>
> > +#include <sys/socketvar.h>
> >  
> >  #include <net/if.h>
> >  #include <net/if_var.h>
> > +#include <net/if_dl.h>
> >  #include <net/if_media.h>
> > +#include <net/if_types.h>
> >  #include <net/route.h>
> > -
> > -#if NBPFILTER > 0
> > -#include <net/bpf.h>
> > -#endif
> > +#include <net/rtable.h>
> >  
> >  #include <netinet/in.h>
> >  #include <netinet/in_var.h>
> >  #include <netinet/if_ether.h>
> >  #include <netinet/ip.h>
> > -#include <netinet/ip_var.h>
> >  #include <netinet/udp.h>
> > -#include <netinet/udp_var.h>
> >  #include <netinet/in_pcb.h>
> > +#include <netinet/ip_var.h>
> >  
> > -#if NPF > 0
> > -#include <net/pfvar.h>
> > +#ifdef INET6
> > +#include <netinet/ip6.h>
> > +#include <netinet6/ip6_var.h>
> > +#include <netinet6/in6_var.h>
> >  #endif
> >  
> > -#if NBRIDGE > 0
> > +/* for bridge stuff */
> >  #include <net/if_bridge.h>
> > +#include <net/if_etherbridge.h>
> > +
> > +#if NBPFILTER > 0
> > +#include <net/bpf.h>
> >  #endif
> >  
> > -#include <net/if_vxlan.h>
> > +/*
> > + * The protocol.
> > + */
> > +
> > +#define VXLANMTU           1492
> > +#define VXLAN_PORT         4789
> > +
> > +struct vxlan_header {
> > +   uint32_t                vxlan_flags;
> > +#define VXLAN_F_I                  (1U << 27)
> > +   uint32_t                vxlan_id;
> > +#define VXLAN_VNI_SHIFT                    8
> > +#define    VXLAN_VNI_MASK                  (0xffffffU << VXLAN_VNI_SHIFT)
> > +};
> > +
> > +#define VXLAN_VNI_MAX                      0x00ffffffU
> > +#define VXLAN_VNI_MIN                      0x00000000U
> > +
> > +/*
> > + * The driver.
> > + */
> > +
> > +union vxlan_addr {
> > +   struct in_addr          in4;
> > +   struct in6_addr         in6;
> > +};
> > +
> > +struct vxlan_softc;
> > +
> > +struct vxlan_peer {
> > +   RBT_ENTRY(vxlan_peer)    p_entry;
> > +
> > +   struct vxlan_header      p_header;
> > +   union vxlan_addr         p_addr;
> > +
> > +   struct vxlan_softc      *p_sc;
> > +};
> > +
> > +RBT_HEAD(vxlan_peers, vxlan_peer);
> > +
> > +struct vxlan_tep {
> > +   TAILQ_ENTRY(vxlan_tep)   vt_entry;
> > +
> > +   sa_family_t              vt_af;
> > +   unsigned int             vt_rdomain;
> > +   union vxlan_addr         vt_addr;
> > +#define vt_addr4 vt_addr.in4
> > +#define vt_addr6 vt_addr.in6
> > +   in_port_t                vt_port;
> > +
> > +   struct socket           *vt_so;
> > +
> > +   struct mutex             vt_mtx;
> > +   struct vxlan_peers       vt_peers;
> > +};
> > +
> > +TAILQ_HEAD(vxlan_teps, vxlan_tep);
> > +
> > +enum vxlan_tunnel_mode {
> > +   VXLAN_TMODE_UNSET,
> > +   VXLAN_TMODE_P2P,         /* unicast destination, no learning */
> > +   VXLAN_TMODE_LEARNING,    /* multicast destination, learning */
> > +   VXLAN_TMODE_ENDPOINT,    /* unset destination, no learning */
> > +};
> >  
> >  struct vxlan_softc {
> >     struct arpcom            sc_ac;
> > -   struct ifmedia           sc_media;
> > +   struct etherbridge       sc_eb;
> > +
> > +   unsigned int             sc_rdomain;
> > +   sa_family_t              sc_af;
> > +   union vxlan_addr         sc_src;
> > +   union vxlan_addr         sc_dst;
> > +   in_port_t                sc_port;
> > +   struct vxlan_header      sc_header;
> > +   unsigned int             sc_if_index0;
> >  
> > -   struct ip_moptions       sc_imo;
> > -   struct task              sc_atask;
> > -   struct task              sc_ltask;
> >     struct task              sc_dtask;
> > +   void                    *sc_inmulti;
> > +
> > +   enum vxlan_tunnel_mode   sc_mode;
> > +   struct vxlan_peer       *sc_ucast_peer;
> > +   struct vxlan_peer       *sc_mcast_peer;
> > +   struct refcnt            sc_refs;
> >  
> > -   struct sockaddr_storage  sc_src;
> > -   struct sockaddr_storage  sc_dst;
> > -   in_port_t                sc_dstport;
> > -   u_int                    sc_rdomain;
> > -   int64_t                  sc_vnetid;
> >     uint16_t                 sc_df;
> > -   u_int8_t                 sc_ttl;
> > +   int                      sc_ttl;
> >     int                      sc_txhprio;
> > +   int                      sc_rxhprio;
> >  
> > -   struct task              sc_sendtask;
> > -
> > -   LIST_ENTRY(vxlan_softc)  sc_entry;
> > +   struct task              sc_send_task;
> >  };
> >  
> > -void        vxlanattach(int);
> > -int         vxlanioctl(struct ifnet *, u_long, caddr_t);
> > -void        vxlanstart(struct ifnet *);
> > -int         vxlan_clone_create(struct if_clone *, int);
> > -int         vxlan_clone_destroy(struct ifnet *);
> > -void        vxlan_multicast_cleanup(struct ifnet *);
> > -int         vxlan_multicast_join(struct ifnet *, struct sockaddr *,
> > -       struct sockaddr *);
> > -int         vxlan_media_change(struct ifnet *);
> > -void        vxlan_media_status(struct ifnet *, struct ifmediareq *);
> > -int         vxlan_config(struct ifnet *, struct sockaddr *, struct 
> > sockaddr *);
> > -int         vxlan_output(struct ifnet *, struct mbuf *);
> > -void        vxlan_addr_change(void *);
> > -void        vxlan_if_change(void *);
> > -void        vxlan_link_change(void *);
> > -void        vxlan_send_dispatch(void *);
> > +void               vxlanattach(int);
> > +
> > +static int vxlan_clone_create(struct if_clone *, int);
> > +static int vxlan_clone_destroy(struct ifnet *);
> > +
> > +static int vxlan_output(struct ifnet *, struct mbuf *,
> > +               struct sockaddr *, struct rtentry *);
> > +static int vxlan_enqueue(struct ifnet *, struct mbuf *);
> > +static void        vxlan_start(struct ifqueue *);
> > +static void        vxlan_send(void *);
> > +
> > +static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
> > +static int vxlan_up(struct vxlan_softc *);
> > +static int vxlan_down(struct vxlan_softc *);
> > +static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *);
> > +static void        vxlan_delmulti(struct vxlan_softc *);
> > +
> > +static struct mbuf *
> > +           vxlan_input(void *, struct mbuf *,
> > +               struct ip *, struct ip6_hdr *, void *, int);
> > +
> > +static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *);
> > +static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *);
> > +static int vxlan_set_tunnel(struct vxlan_softc *,
> > +               const struct if_laddrreq *);
> > +static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *);
> > +static int vxlan_del_tunnel(struct vxlan_softc *);
> > +static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *);
> > +static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *);
> > +static int vxlan_del_vnetid(struct vxlan_softc *);
> > +static int vxlan_set_parent(struct vxlan_softc *,
> > +               const struct if_parent *);
> > +static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *);
> > +static int vxlan_del_parent(struct vxlan_softc *);
> > +
> > +static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *);
> > +static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *);
> >  
> > -int         vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *);
> > -uint16_t vxlan_sockaddr_port(struct sockaddr *);
> > +static void        vxlan_detach_hook(void *);
> >  
> > -struct if_clone    vxlan_cloner =
> > +static struct if_clone vxlan_cloner =
> >      IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
> >  
> > -int         vxlan_enable = 0;
> > -u_long      vxlan_tagmask;
> > +static int  vxlan_eb_port_eq(void *, void *, void *);
> > +static void        *vxlan_eb_port_take(void *, void *);
> > +static void         vxlan_eb_port_rele(void *, void *);
> > +static size_t       vxlan_eb_port_ifname(void *, char *, size_t, void *);
> > +static void         vxlan_eb_port_sa(void *, struct sockaddr_storage *, 
> > void *);
> > +
> > +static const struct etherbridge_ops vxlan_etherbridge_ops = {
> > +   vxlan_eb_port_eq,
> > +   vxlan_eb_port_take,
> > +   vxlan_eb_port_rele,
> > +   vxlan_eb_port_ifname,
> > +   vxlan_eb_port_sa,
> > +};
> > +
> > +static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps");
> > +static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps);
> > +static struct pool vxlan_endpoint_pool;
> >  
> > -#define VXLAN_TAGHASHSIZE           32
> > -#define VXLAN_TAGHASH(tag)          ((unsigned int)tag & vxlan_tagmask)
> > -LIST_HEAD(vxlan_taghash, vxlan_softc)      *vxlan_tagh, vxlan_any;
> > +static inline int  vxlan_peer_cmp(const struct vxlan_peer *,
> > +                       const struct vxlan_peer *);
> > +
> > +RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
> >  
> >  void
> >  vxlanattach(int count)
> >  {
> > -   /* Regular vxlan interfaces with a VNI */
> > -   if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT,
> > -       &vxlan_tagmask)) == NULL)
> > -           panic("vxlanattach: hashinit");
> > -
> > -   /* multipoint-to-multipoint interfaces that accept any VNI */
> > -   LIST_INIT(&vxlan_any);
> > -
> >     if_clone_attach(&vxlan_cloner);
> >  }
> >  
> > -int
> > +static int
> >  vxlan_clone_create(struct if_clone *ifc, int unit)
> >  {
> > -   struct ifnet            *ifp;
> > -   struct vxlan_softc      *sc;
> > +   struct vxlan_softc *sc;
> > +   struct ifnet *ifp;
> > +   int error;
> > +
> > +   if (vxlan_endpoint_pool.pr_size == 0) {
> > +           pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr),
> > +               0, IPL_SOFTNET, 0, "vxlanep", NULL);
> > +   }
> >  
> > -   sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
> > -   sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
> > -       sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
> > -   sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
> > -   sc->sc_dstport = htons(VXLAN_PORT);
> > -   sc->sc_vnetid = VXLAN_VNI_UNSET;
> > -   sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */
> > -   sc->sc_df = htons(0);
> > -   task_set(&sc->sc_atask, vxlan_addr_change, sc);
> > -   task_set(&sc->sc_ltask, vxlan_link_change, sc);
> > -   task_set(&sc->sc_dtask, vxlan_if_change, sc);
> > -   task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc);
> > +   sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
> > +   if (sc == NULL)
> > +           return (ENOMEM);
> >  
> >     ifp = &sc->sc_ac.ac_if;
> > -   snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit);
> > -   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> > -   ether_fakeaddr(ifp);
> >  
> > -   ifp->if_softc = sc;
> > -   ifp->if_ioctl = vxlanioctl;
> > -   ifp->if_start = vxlanstart;
> > +   snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
> > +       ifc->ifc_name, unit);
> >  
> > -   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> > -   ifp->if_capabilities = IFCAP_VLAN_MTU;
> > -   ifp->if_xflags = IFXF_CLONED;
> > +   error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
> > +       &vxlan_etherbridge_ops, sc);
> > +   if (error == -1) {
> > +           free(sc, M_DEVBUF, sizeof(*sc));
> > +           return (error);
> > +   }
> > +
> > +   sc->sc_af = AF_UNSPEC;
> > +   sc->sc_txhprio = 0;
> > +   sc->sc_rxhprio = IF_HDRPRIO_OUTER;
> > +   sc->sc_df = 0;
> > +   sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL;
> > +
> > +   task_set(&sc->sc_dtask, vxlan_detach_hook, sc);
> > +   refcnt_init(&sc->sc_refs);
> > +   task_set(&sc->sc_send_task, vxlan_send, sc);
> >  
> > -   ifmedia_init(&sc->sc_media, 0, vxlan_media_change,
> > -       vxlan_media_status);
> > -   ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
> > -   ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
> > +   ifp->if_softc = sc;
> > +   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> > +   ifp->if_ioctl = vxlan_ioctl;
> > +   ifp->if_output = vxlan_output;
> > +   ifp->if_enqueue = vxlan_enqueue;
> > +   ifp->if_qstart = vxlan_start;
> > +   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
> > +   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
> > +   ether_fakeaddr(ifp);
> >  
> >     if_counters_alloc(ifp);
> >     if_attach(ifp);
> >     ether_ifattach(ifp);
> >  
> > -#if 0
> > -   /*
> > -    * Instead of using a decreased MTU of 1450 bytes, prefer
> > -    * to use the default Ethernet-size MTU of 1500 bytes and to
> > -    * increase the MTU of the outer transport interfaces to
> > -    * at least 1550 bytes. The following is disabled by default.
> > -    */
> > -   ifp->if_mtu = ETHERMTU - sizeof(struct ether_header);
> > -   ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly);
> > -#endif
> > -
> > -   LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry);
> > -   vxlan_enable++;
> > -
> >     return (0);
> >  }
> >  
> > -int
> > +static int
> >  vxlan_clone_destroy(struct ifnet *ifp)
> >  {
> > -   struct vxlan_softc      *sc = ifp->if_softc;
> > +   struct vxlan_softc *sc = ifp->if_softc;
> >  
> >     NET_LOCK();
> > -   vxlan_multicast_cleanup(ifp);
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +           vxlan_down(sc);
> >     NET_UNLOCK();
> >  
> > -   vxlan_enable--;
> > -   LIST_REMOVE(sc, sc_entry);
> > -
> > -   ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
> >     ether_ifdetach(ifp);
> >     if_detach(ifp);
> >  
> > -   if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask))
> > -           taskq_barrier(net_tq(ifp->if_index));
> > +   etherbridge_destroy(&sc->sc_eb);
> > +
> > +   refcnt_finalize(&sc->sc_refs, "vxlanfini");
> >  
> > -   free(sc->sc_imo.imo_membership, M_IPMOPTS,
> > -       sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
> >     free(sc, M_DEVBUF, sizeof(*sc));
> >  
> >     return (0);
> >  }
> >  
> > -void
> > -vxlan_multicast_cleanup(struct ifnet *ifp)
> > +static struct vxlan_softc *
> > +vxlan_take(struct vxlan_softc *sc)
> > +{
> > +   refcnt_take(&sc->sc_refs);
> > +   return (sc);
> > +}
> > +
> > +static void
> > +vxlan_rele(struct vxlan_softc *sc)
> >  {
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > -   struct ip_moptions      *imo = &sc->sc_imo;
> > -   struct ifnet            *mifp;
> > +   refcnt_rele_wake(&sc->sc_refs);
> > +}
> >  
> > -   mifp = if_get(imo->imo_ifidx);
> > -   if (mifp != NULL) {
> > -           if_addrhook_del(mifp, &sc->sc_atask);
> > -           if_linkstatehook_del(mifp, &sc->sc_ltask);
> > -           if_detachhook_del(mifp, &sc->sc_dtask);
> > +static struct mbuf *
> > +vxlan_encap(struct vxlan_softc *sc, struct mbuf *m,
> > +    struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *,
> > +    const union vxlan_addr *, uint8_t))
> > +{
> > +   struct mbuf *m0;
> > +   union vxlan_addr gateway;
> > +   const union vxlan_addr *endpoint;
> > +   struct vxlan_header *vh;
> > +   struct udphdr *uh;
> > +   int prio;
> > +   uint8_t tos;
> >  
> > -           if_put(mifp);
> > -   }
> > +   if (sc->sc_mode == VXLAN_TMODE_UNSET)
> > +           goto drop;
> >  
> > -   if (imo->imo_num_memberships > 0) {
> > -           in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
> > -           imo->imo_ifidx = 0;
> > +   if (sc->sc_mode == VXLAN_TMODE_P2P)
> > +           endpoint = &sc->sc_dst;
> > +   else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */
> > +           struct ether_header *eh = mtod(m, struct ether_header *);
> > +
> > +           smr_read_enter();
> > +           endpoint = etherbridge_resolve_ea(&sc->sc_eb,
> > +               (struct ether_addr *)eh->ether_dhost);
> > +           if (endpoint != NULL) {
> > +                   gateway = *endpoint;
> > +                   endpoint = &gateway;
> > +           }
> > +           smr_read_leave();
> > +
> > +           if (endpoint == NULL) {
> > +                   if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
> > +                           goto drop;
> > +
> > +                   /* "flood" to unknown destinations */
> > +                   endpoint = &sc->sc_dst;
> > +           }
> >     }
> > +
> > +   /* force prepend mbuf because of payload alignment */
> > +   m0 = m_get(M_DONTWAIT, m->m_type);
> > +   if (m0 == NULL)
> > +           goto drop;
> > +
> > +   m_align(m0, 0);
> > +   m0->m_len = 0;
> > +
> > +   M_MOVE_PKTHDR(m0, m);
> > +   m0->m_next = m;
> > +
> > +   m = m_prepend(m0, sizeof(*vh), M_DONTWAIT);
> > +   if (m == NULL)
> > +           return (NULL);
> > +
> > +   vh = mtod(m, struct vxlan_header *);
> > +   *vh = sc->sc_header;
> > +
> > +   m = m_prepend(m, sizeof(*uh), M_DONTWAIT);
> > +   if (m == NULL)
> > +           return (NULL);
> > +
> > +   uh = mtod(m, struct udphdr *);
> > +   uh->uh_sport = sc->sc_port; /* XXX */
> > +   uh->uh_dport = sc->sc_port;
> > +   htobem16(&uh->uh_ulen, m->m_pkthdr.len);
> > +   uh->uh_sum = htons(0);
> > +
> > +   SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
> > +
> > +   prio = sc->sc_txhprio;
> > +   if (prio == IF_HDRPRIO_PACKET)
> > +           prio = m->m_pkthdr.pf.prio;
> > +   tos = IFQ_PRIO2TOS(prio);
> > +
> > +   CLR(m->m_flags, M_BCAST|M_MCAST);
> > +   m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
> > +
> > +#if NPF > 0
> > +   pf_pkt_addr_changed(m);
> > +#endif
> > +
> > +   return ((*ip_encap)(sc, m, endpoint, tos));
> > +drop:
> > +   m_freem(m);
> > +   return (NULL);
> >  }
> >  
> > -int
> > -vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src,
> > -    struct sockaddr *dst)
> > +static struct mbuf *
> > +vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m,
> > +    const union vxlan_addr *endpoint, uint8_t tos)
> >  {
> > -   struct vxlan_softc      *sc = ifp->if_softc;
> > -   struct ip_moptions      *imo = &sc->sc_imo;
> > -   struct sockaddr_in      *src4, *dst4;
> > -#ifdef INET6
> > -   struct sockaddr_in6     *dst6;
> > -#endif /* INET6 */
> > -   struct ifaddr           *ifa;
> > -   struct ifnet            *mifp;
> > +   struct ip *ip;
> > +
> > +   m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
> > +   if (m == NULL)
> > +           return (NULL);
> > +
> > +   ip = mtod(m, struct ip *);
> > +   ip->ip_v = IPVERSION;
> > +   ip->ip_hl = sizeof(*ip) >> 2;
> > +   ip->ip_off = sc->sc_df;
> > +   ip->ip_tos = tos;
> > +   ip->ip_len = htons(m->m_pkthdr.len);
> > +   ip->ip_ttl = sc->sc_ttl;
> > +   ip->ip_p = IPPROTO_UDP;
> > +   ip->ip_src = sc->sc_src.in4;
> > +   ip->ip_dst = endpoint->in4;
> > +
> > +   return (m);
> > +}
> >  
> > -   switch (dst->sa_family) {
> > -   case AF_INET:
> > -           dst4 = satosin(dst);
> > -           if (!IN_MULTICAST(dst4->sin_addr.s_addr))
> > -                   return (0);
> > -           break;
> >  #ifdef INET6
> > -   case AF_INET6:
> > -           dst6 = satosin6(dst);
> > -           if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
> > -                   return (0);
> > +static struct mbuf *
> > +vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m,
> > +    const union vxlan_addr *endpoint, uint8_t tos)
> > +{
> > +   struct ip6_hdr *ip6;
> > +   int len = m->m_pkthdr.len;
> >  
> > -           /* Multicast mode is currently not supported for IPv6 */
> > -           return (EAFNOSUPPORT);
> > +   m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
> > +   if (m == NULL)
> > +           return (NULL);
> > +
> > +   ip6 = mtod(m, struct ip6_hdr *);
> > +   ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
> > +       htonl(m->m_pkthdr.ph_flowid) : 0;
> > +   ip6->ip6_vfc |= IPV6_VERSION;
> > +   ip6->ip6_flow |= htonl((uint32_t)tos << 20);
> > +   ip6->ip6_plen = htons(len);
> > +   ip6->ip6_nxt = IPPROTO_UDP;
> > +   ip6->ip6_hlim = sc->sc_ttl;
> > +   ip6->ip6_src = sc->sc_src.in6;
> > +   ip6->ip6_dst = endpoint->in6;
> > +
> > +   if (sc->sc_df)
> > +           SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
> > +
> > +   return (m);
> > +}
> >  #endif /* INET6 */
> > -   default:
> > -           return (EAFNOSUPPORT);
> > +
> > +static int
> > +vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> > +    struct rtentry *rt)
> > +{
> > +        struct m_tag *mtag;
> > +        int error = 0;
> > +
> > +   mtag = NULL;
> > +   while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) {
> > +           if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
> > +               sizeof(ifp->if_index)) == 0) {
> > +                   error = EIO;
> > +                   goto drop;
> > +           }
> >     }
> >  
> > -   src4 = satosin(src);
> > -   dst4 = satosin(dst);
> > +   mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
> > +   if (mtag == NULL) {
> > +           error = ENOBUFS;
> > +           goto drop;
> > +   }
> > +   memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
> > +   m_tag_prepend(m, mtag);
> >  
> > -   if (src4->sin_addr.s_addr == INADDR_ANY ||
> > -       IN_MULTICAST(src4->sin_addr.s_addr))
> > -           return (EINVAL);
> > -   if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL ||
> > -       (mifp = ifa->ifa_ifp) == NULL ||
> > -       (mifp->if_flags & IFF_MULTICAST) == 0)
> > -           return (EADDRNOTAVAIL);
> > +   return (ether_output(ifp, m, dst, rt));
> >  
> > -   if ((imo->imo_membership[0] =
> > -       in_addmulti(&dst4->sin_addr, mifp)) == NULL)
> > -           return (ENOBUFS);
> > +drop:
> > +   m_freem(m);
> > +   return (error);
> > +}
> >  
> > -   imo->imo_num_memberships++;
> > -   imo->imo_ifidx = mifp->if_index;
> > -   if (sc->sc_ttl > 0)
> > -           imo->imo_ttl = sc->sc_ttl;
> > -   else
> > -           imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL;
> > -   imo->imo_loop = 0;
> > +static int
> > +vxlan_enqueue(struct ifnet *ifp, struct mbuf *m)
> > +{
> > +   struct vxlan_softc *sc = ifp->if_softc;
> > +   struct ifqueue *ifq = &ifp->if_snd;
> > +
> > +   if (ifq_enqueue(ifq, m) != 0)
> > +           return (ENOBUFS);
> >  
> > -   /*
> > -    * Use interface hooks to track any changes on the interface
> > -    * that is used to send out the tunnel traffic as multicast.
> > -    */
> > -   if_addrhook_add(mifp, &sc->sc_atask);
> > -   if_linkstatehook_add(mifp, &sc->sc_ltask);
> > -   if_detachhook_add(mifp, &sc->sc_dtask);
> > +   task_add(ifq->ifq_softnet, &sc->sc_send_task);
> >  
> >     return (0);
> >  }
> >  
> > -void
> > -vxlanstart(struct ifnet *ifp)
> > +static void
> > +vxlan_start(struct ifqueue *ifq)
> >  {
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > +        struct ifnet *ifp = ifq->ifq_if;
> > +        struct vxlan_softc *sc = ifp->if_softc;
> >  
> > -   task_add(net_tq(ifp->if_index), &sc->sc_sendtask);
> > +   task_add(ifq->ifq_softnet, &sc->sc_send_task);
> >  }
> >  
> > -void
> > -vxlan_send_dispatch(void *xsc)
> > +static uint64_t
> > +vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml)
> >  {
> > -   struct vxlan_softc      *sc = xsc;
> > -   struct ifnet            *ifp = &sc->sc_ac.ac_if;
> > -   struct mbuf             *m;
> > -   struct mbuf_list         ml;
> > -
> > -   ml_init(&ml);
> > -   for (;;) {
> > -           m = ifq_dequeue(&ifp->if_snd);
> > -           if (m == NULL)
> > -                   break;
> > -
> > -#if NBPFILTER > 0
> > -           if (ifp->if_bpf)
> > -                   bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> > -#endif
> > -
> > -           ml_enqueue(&ml, m);
> > -   }
> > -
> > -   if (ml_empty(&ml))
> > -           return;
> > +   struct ip_moptions imo;
> > +   struct mbuf *m;
> > +   uint64_t oerrors = 0;
> > +
> > +   imo.imo_ifidx = sc->sc_if_index0;
> > +   imo.imo_ttl = sc->sc_ttl;
> > +   imo.imo_loop = 0;
> >  
> >     NET_LOCK();
> > -   while ((m = ml_dequeue(&ml)) != NULL) {
> > -           vxlan_output(ifp, m);
> > +   while ((m = ml_dequeue(ml)) != NULL) {
> > +           if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
> > +                   oerrors++;
> >     }
> >     NET_UNLOCK();
> > +
> > +   return (oerrors);
> >  }
> >  
> > +#ifdef INET6
> > +static uint64_t
> > +vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml)
> > +{
> > +   struct ip6_moptions im6o;
> > +   struct mbuf *m;
> > +   uint64_t oerrors = 0;
> > +
> > +   im6o.im6o_ifidx = sc->sc_if_index0;
> > +   im6o.im6o_hlim = sc->sc_ttl;
> > +   im6o.im6o_loop = 0;
> > +
> > +   NET_LOCK();
> > +   while ((m = ml_dequeue(ml)) != NULL) {
> > +           if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
> > +                   oerrors++;
> > +        }
> > +        NET_UNLOCK();
> > +
> > +        return (oerrors);
> > +}
> > +#endif /* INET6 */
> >  
> > -int
> > -vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
> > +static void
> > +vxlan_send(void *arg)
> >  {
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > -   int                      reset = 0, error, af;
> > -   socklen_t                slen;
> > -   in_port_t                port;
> > -   struct vxlan_taghash    *tagh;
> > -
> > -   if (src != NULL && dst != NULL) {
> > -           if ((af = src->sa_family) != dst->sa_family)
> > -                   return (EAFNOSUPPORT);
> > -   } else {
> > -           /* Reset current configuration */
> > -           af = sc->sc_src.ss_family;
> > -           src = sstosa(&sc->sc_src);
> > -           dst = sstosa(&sc->sc_dst);
> > -           reset = 1;
> > -   }
> > +        struct vxlan_softc *sc = arg;
> > +        struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +   struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *,
> > +       const union vxlan_addr *, uint8_t);
> > +   uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *);
> > +   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> > +   struct mbuf *m;
> > +   uint64_t oerrors;
> > +
> > +   if (!ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return;
> >  
> > -   switch (af) {
> > +   switch (sc->sc_af) {
> >     case AF_INET:
> > -           slen = sizeof(struct sockaddr_in);
> > +           ip_encap = vxlan_encap_ipv4;
> > +           ip_send = vxlan_send_ipv4;
> >             break;
> >  #ifdef INET6
> >     case AF_INET6:
> > -           slen = sizeof(struct sockaddr_in6);
> > +           ip_encap = vxlan_encap_ipv6;
> > +           ip_send = vxlan_send_ipv6;
> >             break;
> > -#endif /* INET6 */
> > +#endif
> >     default:
> > -           return (EAFNOSUPPORT);
> > +           unhandled_af(sc->sc_af);
> > +           /* NOTREACHED */
> >     }
> >  
> > -   if (src->sa_len != slen || dst->sa_len != slen)
> > -           return (EINVAL);
> > +   while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
> > +#if NBPFILTER > 0
> > +           caddr_t if_bpf = READ_ONCE(ifp->if_bpf);
> > +           if (if_bpf != NULL)
> > +                   bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> > +#endif
> > +           m = vxlan_encap(sc, m, ip_encap);
> > +           if (m == NULL)
> > +                   continue;
> >  
> > -   vxlan_multicast_cleanup(ifp);
> > +           ml_enqueue(&ml, m);
> > +   }
> >  
> > -   /* returns without error if multicast is not configured */
> > -   if ((error = vxlan_multicast_join(ifp, src, dst)) != 0)
> > -           return (error);
> > +   oerrors = (*ip_send)(sc, &ml);
> > +
> > +   counters_add(ifp->if_counters, ifc_oerrors, oerrors);
> > +}
> > +
> > +static struct mbuf *
> > +vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
> > +    void *uhp, int hlen)
> > +{
> > +   struct vxlan_tep *vt = arg;
> > +   union vxlan_addr addr;
> > +   struct vxlan_peer key, *p;
> > +   struct udphdr *uh;
> > +   struct vxlan_header *vh;
> > +   struct ether_header *eh;
> > +   int vhlen = hlen + sizeof(*vh);
> > +   struct mbuf *n;
> > +   int off;
> > +   in_port_t port;
> > +   struct vxlan_softc *sc = NULL;
> > +   struct ifnet *ifp;
> > +
> > +   if (m->m_pkthdr.len < vhlen)
> > +           goto drop;
> > +
> > +   uh = uhp;
> > +   port = uh->uh_sport;
> >  
> > -   if ((port = vxlan_sockaddr_port(dst)) != 0)
> > -           sc->sc_dstport = port;
> > +   if (ip != NULL)
> > +           addr.in4 = ip->ip_src;
> > +#ifdef INET6
> > +   else
> > +           addr.in6 = ip6->ip6_src;
> > +#endif
> >  
> > -   if (!reset) {
> > -           bzero(&sc->sc_src, sizeof(sc->sc_src));
> > -           bzero(&sc->sc_dst, sizeof(sc->sc_dst));
> > -           memcpy(&sc->sc_src, src, src->sa_len);
> > -           memcpy(&sc->sc_dst, dst, dst->sa_len);
> > +   if (m->m_len < vhlen) {
> > +           m = m_pullup(m, vhlen);
> > +           if (m == NULL)
> > +                   return (NULL);
> >     }
> >  
> > -   if (sc->sc_vnetid == VXLAN_VNI_ANY) {
> > -           /*
> > -            * If the interface accepts any VNI, put it into a separate
> > -            * list that is not part of the main hash.
> > -            */
> > -           tagh = &vxlan_any;
> > -   } else
> > -           tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)];
> > +   /* can't use ip/ip6/uh after this */
> >  
> > -   LIST_REMOVE(sc, sc_entry);
> > -   LIST_INSERT_HEAD(tagh, sc, sc_entry);
> > +   vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen);
> >  
> > -   return (0);
> > +   memset(&key, 0, sizeof(key));
> > +   key.p_addr = addr;
> > +   key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I);
> > +   key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK);
> > +
> > +   mtx_enter(&vt->vt_mtx);
> > +   p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
> > +   if (p == NULL) {
> > +           memset(&key.p_addr, 0, sizeof(key.p_addr));
> > +           p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
> > +   }
> > +   if (p != NULL) 
> > +           sc = vxlan_take(p->p_sc);
> > +   mtx_leave(&vt->vt_mtx);
> > +
> > +   if (sc == NULL)
> > +           goto drop;
> > +
> > +   ifp = &sc->sc_ac.ac_if;
> > +   if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port)
> > +           goto rele_drop;
> > +
> > +   m_adj(m, vhlen);
> > +
> > +   if (m->m_pkthdr.len < sizeof(*eh))
> > +           goto rele_drop;
> > +
> > +   if (m->m_len < sizeof(*eh)) {
> > +           m = m_pullup(m, sizeof(*eh));
> > +           if (m == NULL)
> > +                   goto rele;
> > +   }
> > +
> > +   n = m_getptr(m, sizeof(*eh), &off);
> > +   if (n == NULL)
> > +           goto rele_drop;
> > +
> > +   if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
> > +           n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
> > +           m_freem(m);
> > +           if (n == NULL)
> > +                   goto rele;
> > +           m = n;
> > +   }
> > +
> > +   if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
> > +           eh = mtod(m, struct ether_header *);
> > +           etherbridge_map_ea(&sc->sc_eb, &addr,
> > +               (struct ether_addr *)eh->ether_shost);
> > +   }
> > +
> > +   /* XXX prio */
> > +
> > +   if_vinput(ifp, m);
> > +rele:
> > +   vxlan_rele(sc);
> > +   return (NULL);
> > +
> > +rele_drop:
> > +   vxlan_rele(sc);
> > +drop:
> > +   m_freem(m);
> > +   return (NULL);
> >  }
> >  
> > -int
> > -vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> > +static int
> > +vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> >  {
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > -   struct ifreq            *ifr = (struct ifreq *)data;
> > -   struct if_laddrreq      *lifr = (struct if_laddrreq *)data;
> > -   int                      error = 0;
> > +   struct vxlan_softc *sc = ifp->if_softc;
> > +   struct ifreq *ifr = (struct ifreq *)data;
> > +   struct ifbrparam *bparam = (struct ifbrparam *)data;
> > +   int error = 0;
> >  
> >     switch (cmd) {
> >     case SIOCSIFADDR:
> > -           ifp->if_flags |= IFF_UP;
> > -           /* FALLTHROUGH */
> > -
> > +           break;
> >     case SIOCSIFFLAGS:
> > -           if (ifp->if_flags & IFF_UP) {
> > -                   ifp->if_flags |= IFF_RUNNING;
> > +           if (ISSET(ifp->if_flags, IFF_UP)) {
> > +                   if (!ISSET(ifp->if_flags, IFF_RUNNING))
> > +                           error = vxlan_up(sc);
> > +                   else
> > +                           error = 0;
> >             } else {
> > -                   ifp->if_flags &= ~IFF_RUNNING;
> > +                   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +                           error = vxlan_down(sc);
> >             }
> >             break;
> >  
> > -   case SIOCADDMULTI:
> > -   case SIOCDELMULTI:
> > +   case SIOCSLIFPHYRTABLE:
> > +           error = vxlan_set_rdomain(sc, ifr);
> >             break;
> > -
> > -   case SIOCGIFMEDIA:
> > -   case SIOCSIFMEDIA:
> > -           error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
> > +   case SIOCGLIFPHYRTABLE:
> > +           error = vxlan_get_rdomain(sc, ifr);
> >             break;
> >  
> >     case SIOCSLIFPHYADDR:
> > -           error = vxlan_config(ifp,
> > -               sstosa(&lifr->addr),
> > -               sstosa(&lifr->dstaddr));
> > +           error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data);
> > +           break;
> > +   case SIOCGLIFPHYADDR:
> > +           error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data);
> >             break;
> > -
> >     case SIOCDIFPHYADDR:
> > -           vxlan_multicast_cleanup(ifp);
> > -           bzero(&sc->sc_src, sizeof(sc->sc_src));
> > -           bzero(&sc->sc_dst, sizeof(sc->sc_dst));
> > -           sc->sc_dstport = htons(VXLAN_PORT);
> > +           error = vxlan_del_tunnel(sc);
> >             break;
> >  
> > -   case SIOCGLIFPHYADDR:
> > -           if (sc->sc_dst.ss_family == AF_UNSPEC) {
> > -                   error = EADDRNOTAVAIL;
> > -                   break;
> > -           }
> > -           bzero(&lifr->addr, sizeof(lifr->addr));
> > -           bzero(&lifr->dstaddr, sizeof(lifr->dstaddr));
> > -           memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len);
> > -           memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len);
> > +   case SIOCSVNETID:
> > +           error = vxlan_set_vnetid(sc, ifr);
> >             break;
> > -
> > -   case SIOCSLIFPHYRTABLE:
> > -           if (ifr->ifr_rdomainid < 0 ||
> > -               ifr->ifr_rdomainid > RT_TABLEID_MAX ||
> > -               !rtable_exists(ifr->ifr_rdomainid)) {
> > -                   error = EINVAL;
> > -                   break;
> > -           }
> > -           sc->sc_rdomain = ifr->ifr_rdomainid;
> > -           (void)vxlan_config(ifp, NULL, NULL);
> > +   case SIOCGVNETID:
> > +           error = vxlan_get_vnetid(sc, ifr);
> > +           break;
> > +   case SIOCDVNETID:
> > +           error = vxlan_del_vnetid(sc);
> >             break;
> >  
> > -   case SIOCGLIFPHYRTABLE:
> > -           ifr->ifr_rdomainid = sc->sc_rdomain;
> > +   case SIOCSIFPARENT:
> > +           error = vxlan_set_parent(sc, (struct if_parent *)data);
> > +           break;
> > +   case SIOCGIFPARENT:
> > +           error = vxlan_get_parent(sc, (struct if_parent *)data);
> > +           break;
> > +   case SIOCDIFPARENT:
> > +           error = vxlan_del_parent(sc);
> >             break;
> >  
> > -   case SIOCSLIFPHYTTL:
> > -           if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
> > -                   error = EINVAL;
> > +   case SIOCSTXHPRIO:
> > +           error = if_txhprio_l2_check(ifr->ifr_hdrprio);
> > +           if (error != 0)
> >                     break;
> > -           }
> > -           if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl)
> > -                   break;
> > -           sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl);
> > -           (void)vxlan_config(ifp, NULL, NULL);
> > +
> > +           sc->sc_txhprio = ifr->ifr_hdrprio;
> > +           break;
> > +   case SIOCGTXHPRIO:
> > +           ifr->ifr_hdrprio = sc->sc_txhprio;
> >             break;
> >  
> > -   case SIOCGLIFPHYTTL:
> > -           ifr->ifr_ttl = (int)sc->sc_ttl;
> > +   case SIOCSRXHPRIO:
> > +           error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
> > +           if (error != 0)
> > +                   break;
> > +
> > +           sc->sc_rxhprio = ifr->ifr_hdrprio;
> > +           break;
> > +   case SIOCGRXHPRIO:
> > +           ifr->ifr_hdrprio = sc->sc_rxhprio;
> >             break;
> >  
> >     case SIOCSLIFPHYDF:
> > @@ -496,50 +802,45 @@ vxlanioctl(struct ifnet *ifp, u_long cmd
> >             ifr->ifr_df = sc->sc_df ? 1 : 0;
> >             break;
> >  
> > -   case SIOCSTXHPRIO:
> > -           if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET)
> > -                   ; /* fall through */
> > -           else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN ||
> > -               ifr->ifr_hdrprio > IF_HDRPRIO_MAX) {
> > +   case SIOCSLIFPHYTTL:
> > +           if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
> >                     error = EINVAL;
> >                     break;
> >             }
> >  
> > -           sc->sc_txhprio = ifr->ifr_hdrprio;
> > +           /* commit */
> > +           sc->sc_ttl = (uint8_t)ifr->ifr_ttl;
> >             break;
> > -   case SIOCGTXHPRIO:
> > -           ifr->ifr_hdrprio = sc->sc_txhprio;
> > +   case SIOCGLIFPHYTTL:
> > +           ifr->ifr_ttl = (int)sc->sc_ttl;
> >             break;
> >  
> > -   case SIOCSVNETID:
> > -           if (sc->sc_vnetid == ifr->ifr_vnetid)
> > -                   break;
> > -
> > -           if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) &&
> > -               (ifr->ifr_vnetid > VXLAN_VNI_MAX ||
> > -                ifr->ifr_vnetid < VXLAN_VNI_MIN)) {
> > -                   error = EINVAL;
> > -                   break;
> > -           }
> > -
> > -           sc->sc_vnetid = (int)ifr->ifr_vnetid;
> > -           (void)vxlan_config(ifp, NULL, NULL);
> > +   case SIOCBRDGSCACHE:
> > +           error = etherbridge_set_max(&sc->sc_eb, bparam);
> >             break;
> > -
> > -   case SIOCGVNETID:
> > -           if ((sc->sc_vnetid != VXLAN_VNI_ANY) &&
> > -               (sc->sc_vnetid > VXLAN_VNI_MAX ||
> > -                sc->sc_vnetid < VXLAN_VNI_MIN)) {
> > -                   error = EADDRNOTAVAIL;
> > -                   break;
> > -           }
> > -
> > -           ifr->ifr_vnetid = sc->sc_vnetid;
> > +   case SIOCBRDGGCACHE:
> > +           error = etherbridge_get_max(&sc->sc_eb, bparam);
> > +           break;
> > +   case SIOCBRDGSTO:
> > +           error = etherbridge_set_tmo(&sc->sc_eb, bparam);
> > +           break;
> > +   case SIOCBRDGGTO:
> > +           error = etherbridge_get_tmo(&sc->sc_eb, bparam);
> >             break;
> >  
> > -   case SIOCDVNETID:
> > -           sc->sc_vnetid = VXLAN_VNI_UNSET;
> > -           (void)vxlan_config(ifp, NULL, NULL);
> > +   case SIOCBRDGRTS:
> > +           error = etherbridge_rtfind(&sc->sc_eb,
> > +               (struct ifbaconf *)data);
> > +           break;
> > +   case SIOCBRDGFLUSH:
> > +           etherbridge_flush(&sc->sc_eb,
> > +               ((struct ifbreq *)data)->ifbr_ifsflags);
> > +           break;
> > +   case SIOCBRDGSADDR:
> > +           error = vxlan_add_addr(sc, (struct ifbareq *)data);
> > +           break;
> > +   case SIOCBRDGDADDR:
> > +           error = vxlan_del_addr(sc, (struct ifbareq *)data);
> >             break;
> >  
> >     default:
> > @@ -550,465 +851,960 @@ vxlanioctl(struct ifnet *ifp, u_long cmd
> >     return (error);
> >  }
> >  
> > -int
> > -vxlan_media_change(struct ifnet *ifp)
> > +static struct vxlan_tep *
> > +vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr)
> >  {
> > -   return (0);
> > -}
> > +   struct vxlan_tep *vt;
> >  
> > -void
> > -vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr)
> > -{
> > -   imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
> > +   TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) {
> > +           if (sc->sc_af == vt->vt_af &&
> > +               sc->sc_rdomain == vt->vt_rdomain &&
> > +               memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 &&
> > +               sc->sc_port == vt->vt_port)
> > +                   return (vt);
> > +   }
> > +
> > +   return (NULL);
> >  }
> >  
> > -int
> > -vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa)
> > +static int
> > +vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
> > +    struct vxlan_peer *p)
> >  {
> > -   struct sockaddr_in      *src4, *dst4;
> > +   struct mbuf m;
> > +   struct vxlan_tep *vt;
> > +   struct socket *so;
> > +   struct sockaddr_in *sin;
> >  #ifdef INET6
> > -   struct sockaddr_in6     *src6, *dst6;
> > -#endif /* INET6 */
> > +   struct sockaddr_in6 *sin6;
> > +#endif
> > +   int error;
> > +   int s;
> >  
> > -   if (srcsa->sa_family != dstsa->sa_family)
> > -           return (1);
> > +   vt = vxlan_tep_get(sc, addr);
> > +   if (vt != NULL) {
> > +           struct vxlan_peer *op;
> > +
> > +           mtx_enter(&vt->vt_mtx);
> > +           op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
> > +           mtx_leave(&vt->vt_mtx);
> > +
> > +           if (op != NULL)
> > +                   return (EADDRINUSE);
> > +
> > +           return (0);
> > +   }
> >  
> > -   switch (dstsa->sa_family) {
> > +   vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO);
> > +   if (vt == NULL)
> > +           return (ENOMEM);
> > +
> > +   vt->vt_af = sc->sc_af;
> > +   vt->vt_rdomain = sc->sc_rdomain;
> > +   vt->vt_addr = *addr;
> > +   vt->vt_port = sc->sc_port;
> > +
> > +   mtx_init(&vt->vt_mtx, IPL_SOFTNET);
> > +   RBT_INIT(vxlan_peers, &vt->vt_peers);
> > +   RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
> > +
> > +   error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP);
> > +   if (error != 0)
> > +           goto free;
> > +
> > +   s = solock(so);
> > +
> > +   sotoinpcb(so)->inp_upcall = vxlan_input;
> > +   sotoinpcb(so)->inp_upcall_arg = vt;
> > +
> > +   m_inithdr(&m);
> > +   m.m_len = sizeof(vt->vt_rdomain);
> > +   *mtod(&m, unsigned int *) = vt->vt_rdomain;
> > +   error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m);
> > +   if (error != 0)
> > +           goto close;
> > +
> > +   m_inithdr(&m);
> > +   switch (vt->vt_af) {
> >     case AF_INET:
> > -           src4 = satosin(srcsa);
> > -           dst4 = satosin(dstsa);
> > -           if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr)
> > -                   return (0);
> > +           sin = mtod(&m, struct sockaddr_in *);
> > +           memset(sin, 0, sizeof(*sin));
> > +           sin->sin_len = sizeof(*sin);
> > +           sin->sin_family = AF_INET;
> > +           sin->sin_addr = addr->in4;
> > +           sin->sin_port = vt->vt_port;
> > +
> > +           m.m_len = sizeof(*sin);
> >             break;
> > +
> >  #ifdef INET6
> >     case AF_INET6:
> > -           src6 = satosin6(srcsa);
> > -           dst6 = satosin6(dstsa);
> > -           if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) &&
> > -               src6->sin6_scope_id == dst6->sin6_scope_id)
> > -                   return (0);
> > +           sin6 = mtod(&m, struct sockaddr_in6 *);
> > +           sin6->sin6_len = sizeof(*sin6);
> > +           sin6->sin6_family = AF_INET6;
> > +           in6_recoverscope(sin6, &addr->in6);
> > +           sin6->sin6_port = sc->sc_port;
> > +
> > +           m.m_len = sizeof(*sin6);
> >             break;
> > -#endif /* INET6 */
> > +#endif
> > +   default:
> > +           unhandled_af(vt->vt_af);
> >     }
> >  
> > -   return (1);
> > +   error = sobind(so, &m, curproc);
> > +   if (error != 0)
> > +           goto close;
> > +
> > +   sounlock(so, s);
> > +
> > +   rw_assert_wrlock(&vxlan_lock);
> > +   TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry);
> > +
> > +   vt->vt_so = so;
> > +
> > +   return (0);
> > +
> > +close:
> > +   sounlock(so, s);
> > +   soclose(so, MSG_DONTWAIT);
> > +free:
> > +   free(vt, M_DEVBUF, sizeof(*vt));
> > +   return (error);
> >  }
> >  
> > -uint16_t
> > -vxlan_sockaddr_port(struct sockaddr *sa)
> > +static void
> > +vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
> > +    struct vxlan_peer *p)
> >  {
> > -   struct sockaddr_in      *sin4;
> > -#ifdef INET6
> > -   struct sockaddr_in6     *sin6;
> > -#endif /* INET6 */
> > +   struct vxlan_tep *vt;
> > +   int empty;
> >  
> > -   switch (sa->sa_family) {
> > -   case AF_INET:
> > -           sin4 = satosin(sa);
> > -           return (sin4->sin_port);
> > -#ifdef INET6
> > -   case AF_INET6:
> > -           sin6 = satosin6(sa);
> > -           return (sin6->sin6_port);
> > -#endif /* INET6 */
> > -   default:
> > -           break;
> > -   }
> > +   vt = vxlan_tep_get(sc, addr);
> > +   if (vt == NULL)
> > +           panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc);
> > +
> > +   mtx_enter(&vt->vt_mtx);
> > +   RBT_REMOVE(vxlan_peers, &vt->vt_peers, p);
> > +   empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers);
> > +   mtx_leave(&vt->vt_mtx);
> >  
> > -   return (0);
> > +   if (!empty)
> > +           return;
> > +
> > +   rw_assert_wrlock(&vxlan_lock);
> > +   TAILQ_REMOVE(&vxlan_teps, vt, vt_entry);
> > +
> > +   soclose(vt->vt_so, MSG_DONTWAIT);
> > +   free(vt, M_DEVBUF, sizeof(*vt));
> >  }
> >  
> > -int
> > -vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen,
> > -    struct sockaddr *srcsa, struct sockaddr *dstsa)
> > -{
> > -   struct vxlan_softc      *sc = NULL, *sc_cand = NULL;
> > -   struct vxlan_header      v;
> > -   int                      vni;
> > -   struct ifnet            *ifp;
> > -   int                      skip;
> > -#if NBRIDGE > 0
> > -   struct bridge_tunneltag *brtag;
> > -#endif
> > -   struct mbuf             *n;
> > -   int                      off;
> > -
> > -   /* XXX Should verify the UDP port first before copying the packet */
> > -   skip = iphlen + sizeof(*uh);
> > -   if (m->m_pkthdr.len - skip < sizeof(v))
> > -           return (0);
> > -   m_copydata(m, skip, sizeof(v), &v);
> > -   skip += sizeof(v);
> > +static int
> > +vxlan_tep_up(struct vxlan_softc *sc)
> > +{
> > +   struct vxlan_peer *up, *mp;
> > +   int error;
> > +
> > +   up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO);
> > +   if (up == NULL)
> > +           return (ENOMEM);
> >  
> > -   if (v.vxlan_flags & htonl(VXLAN_RESERVED1) ||
> > -       v.vxlan_id & htonl(VXLAN_RESERVED2))
> > +   if (sc->sc_mode == VXLAN_TMODE_P2P)
> > +           up->p_addr = sc->sc_dst;
> > +   up->p_header = sc->sc_header;
> > +   up->p_sc = vxlan_take(sc);
> > +
> > +   error = vxlan_tep_add_addr(sc, &sc->sc_src, up);
> > +   if (error != 0)
> > +           goto freeup;
> > +
> > +   sc->sc_ucast_peer = up;
> > +
> > +   if (sc->sc_mode != VXLAN_TMODE_LEARNING)
> >             return (0);
> >  
> > -   vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S;
> > -   if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) {
> > -           if (vni != 0)
> > -                   return (0);
> > +   mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO);
> > +   if (mp == NULL) {
> > +           error = ENOMEM;
> > +           goto delup;
> > +   }
> > +
> > +   /* addr is multicast, leave it as 0s */
> > +   mp->p_header = sc->sc_header;
> > +   mp->p_sc = vxlan_take(sc);
> > +
> > +   /* destination address is a multicast group we want to join */
> > +   error = vxlan_tep_add_addr(sc, &sc->sc_dst, up);
> > +   if (error != 0)
> > +           goto freemp;
> > +
> > +   sc->sc_mcast_peer = mp;
> >  
> > -           vni = VXLAN_VNI_UNSET;
> > +   return (0);
> > +
> > +freemp:
> > +   vxlan_rele(mp->p_sc);
> > +   free(mp, M_DEVBUF, sizeof(*mp));
> > +delup:
> > +   vxlan_tep_del_addr(sc, &sc->sc_src, up);
> > +freeup:
> > +   vxlan_rele(up->p_sc);
> > +   free(up, M_DEVBUF, sizeof(*up));
> > +   return (error);
> > +}
> > +
> > +static void
> > +vxlan_tep_down(struct vxlan_softc *sc)
> > +{
> > +   struct vxlan_peer *up = sc->sc_ucast_peer;
> > +
> > +   if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
> > +           struct vxlan_peer *mp = sc->sc_mcast_peer;
> > +           vxlan_tep_del_addr(sc, &sc->sc_dst, mp);
> > +           vxlan_rele(mp->p_sc);
> > +           free(mp, M_DEVBUF, sizeof(*mp));
> >     }
> >  
> > +   vxlan_tep_del_addr(sc, &sc->sc_src, up);
> > +   vxlan_rele(up->p_sc);
> > +   free(up, M_DEVBUF, sizeof(*up));
> > +}
> > +
> > +static int
> > +vxlan_up(struct vxlan_softc *sc)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +   struct ifnet *ifp0 = NULL;
> > +   int error;
> > +
> > +   KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
> >     NET_ASSERT_LOCKED();
> > -   /* First search for a vxlan(4) interface with the packet's VNI */
> > -   LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) {
> > -           if ((uh->uh_dport == sc->sc_dstport) &&
> > -               vni == sc->sc_vnetid &&
> > -               sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) {
> > -                   sc_cand = sc;
> > -                   if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0)
> > -                           goto found;
> > -           }
> > +
> > +   if (sc->sc_af == AF_UNSPEC)
> > +           return (EDESTADDRREQ);
> > +   KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
> > +
> > +   NET_UNLOCK();
> > +
> > +   error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
> > +   if (error != 0)
> > +           goto netlock;
> > +
> > +   NET_LOCK();
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> > +           /* something else beat us */
> > +           rw_exit(&vxlan_lock);
> > +           return (0);
> >     }
> > +   NET_UNLOCK();
> >  
> > -   /*
> > -    * Now loop through all the vxlan(4) interfaces that are configured
> > -    * to accept any VNI and operating in multipoint-to-multipoint mode
> > -    * that is used in combination with bridge(4) or switch(4).
> > -    * If a vxlan(4) interface has been found for the packet's VNI, this
> > -    * code is not reached as the other interface is more specific.
> > -    */
> > -   LIST_FOREACH(sc, &vxlan_any, sc_entry) {
> > -           if ((uh->uh_dport == sc->sc_dstport) &&
> > -               (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) {
> > -                   sc_cand = sc;
> > -                   goto found;
> > -           }
> > +   if (sc->sc_mode != VXLAN_TMODE_P2P) {
> > +           error = etherbridge_up(&sc->sc_eb);
> > +           if (error != 0)
> > +                   goto unlock;
> >     }
> >  
> > -   if (sc_cand) {
> > -           sc = sc_cand;
> > -           goto found;
> > +   if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
> > +           ifp0 = if_get(sc->sc_if_index0);
> > +           if (ifp0 == NULL) {
> > +                   error = ENXIO;
> > +                   goto down;
> > +           }
> > +
> > +           /* check again if multicast will work on top of the parent */
> > +           if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
> > +                   error = EPROTONOSUPPORT;
> > +                   goto put;
> > +           }
> > +
> > +           error = vxlan_addmulti(sc, ifp0);
> > +           if (error != 0)
> > +                   goto put;
> > +
> > +           /* Register callback if parent wants to unregister */
> > +           if_detachhook_add(ifp0, &sc->sc_dtask);
> > +   } else {
> > +           if (sc->sc_if_index0 != 0) {
> > +                   error = EPROTONOSUPPORT;
> > +                   goto down;
> > +           }
> >     }
> >  
> > -   /* not found */
> > +   error = vxlan_tep_up(sc);
> > +   if (error != 0)
> > +           goto del;
> > +
> > +   if_put(ifp0);
> > +
> > +   NET_LOCK();
> > +   SET(ifp->if_flags, IFF_RUNNING);
> > +   rw_exit(&vxlan_lock);
> > +
> >     return (0);
> >  
> > - found:
> > -   if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) {
> > -           m_freem(m);
> > -           return (EINVAL);
> > +del:
> > +   if (ifp0 != NULL)
> > +           if_detachhook_del(ifp0, &sc->sc_dtask);
> > +   vxlan_delmulti(sc);
> > +put:
> > +   if_put(ifp0);
> > +down:
> > +   if (sc->sc_mode != VXLAN_TMODE_P2P)
> > +           etherbridge_down(&sc->sc_eb);
> > +unlock:
> > +   rw_exit(&vxlan_lock);
> > +netlock:
> > +   NET_LOCK();
> > +
> > +   return (error);
> > +}
> > +
> > +static int
> > +vxlan_down(struct vxlan_softc *sc)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +   struct ifnet *ifp0;
> > +   int error;
> > +
> > +   KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
> > +   NET_UNLOCK();
> > +
> > +   error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
> > +   if (error != 0) {
> > +           NET_LOCK();
> > +           return (error);
> >     }
> >  
> > -   m_adj(m, skip);
> > -   ifp = &sc->sc_ac.ac_if;
> > +   NET_LOCK();
> > +   if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
> > +           /* something else beat us */
> > +           rw_exit(&vxlan_lock);
> > +           return (0);
> > +   }
> > +   NET_UNLOCK();
> >  
> > -#if NBRIDGE > 0
> > -   /* Store the tunnel src/dst IP and vni for the bridge or switch */
> > -   if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) &&
> > -       srcsa->sa_family != AF_UNSPEC &&
> > -       ((brtag = bridge_tunneltag(m)) != NULL)) {
> > -           memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len);
> > -           memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len);
> > -           brtag->brtag_id = vni;
> > +   vxlan_tep_down(sc);
> > +
> > +   if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
> > +           vxlan_delmulti(sc);
> > +           ifp0 = if_get(sc->sc_if_index0);
> > +           if (ifp0 != NULL) {
> > +                   if_detachhook_del(ifp0, &sc->sc_dtask);
> > +           }
> > +           if_put(ifp0);
> >     }
> > -#endif
> >  
> > -   m->m_flags &= ~(M_BCAST|M_MCAST);
> > +   if (sc->sc_mode != VXLAN_TMODE_P2P)
> > +           etherbridge_down(&sc->sc_eb);
> >  
> > -#if NPF > 0
> > -   pf_pkt_addr_changed(m);
> > -#endif
> > -   if ((m->m_len < sizeof(struct ether_header)) &&
> > -       (m = m_pullup(m, sizeof(struct ether_header))) == NULL)
> > -           return (ENOBUFS);
> > +   taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task);
> > +   NET_LOCK();
> > +   CLR(ifp->if_flags, IFF_RUNNING);
> > +   rw_exit(&vxlan_lock);
> >  
> > -   n = m_getptr(m, sizeof(struct ether_header), &off);
> > -   if (n == NULL) {
> > -           m_freem(m);
> > -           return (EINVAL);
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0)
> > +{
> > +   int error = 0;
> > +
> > +   NET_LOCK();
> > +
> > +   switch (sc->sc_af) {
> > +   case AF_INET:
> > +           sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0);
> > +           if (sc->sc_inmulti == NULL)
> > +                   error = EADDRNOTAVAIL;
> > +           break;
> > +#ifdef INET6
> > +   case AF_INET6:
> > +           sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error);
> > +           break;
> > +#endif
> > +   default:
> > +           unhandled_af(sc->sc_af);
> >     }
> > -   if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
> > -           n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
> > -           /* Dispose of the original mbuf chain */
> > -           m_freem(m);
> > -           if (n == NULL)
> > -                   return (ENOBUFS);
> > -           m = n;
> > +
> > +   NET_UNLOCK();
> > +
> > +   return (error);
> > +}
> > +
> > +static void
> > +vxlan_delmulti(struct vxlan_softc *sc)
> > +{
> > +   NET_LOCK();
> > +
> > +   switch (sc->sc_af) {
> > +   case AF_INET:
> > +           in_delmulti(sc->sc_inmulti);
> > +           break;
> > +#ifdef INET6
> > +   case AF_INET6:
> > +           in6_delmulti(sc->sc_inmulti);
> > +           break;
> > +#endif
> > +   default:
> > +           unhandled_af(sc->sc_af);
> >     }
> >  
> > -   if_vinput(ifp, m);
> > +   sc->sc_inmulti = NULL; /* keep it tidy */
> >  
> > -   /* success */
> > -   return (1);
> > +   NET_UNLOCK();
> >  }
> >  
> > -struct mbuf *
> > -vxlan_encap4(struct ifnet *ifp, struct mbuf *m,
> > -    struct sockaddr *src, struct sockaddr *dst)
> > -{
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > -   struct ip               *ip;
> > -
> > -   /*
> > -    * Remove multicast and broadcast flags or encapsulated packet
> > -    * ends up as multicast or broadcast packet.
> > -    */
> > -   m->m_flags &= ~(M_BCAST|M_MCAST);
> > +static int
> > +vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> >  
> > -   M_PREPEND(m, sizeof(*ip), M_DONTWAIT);
> > -   if (m == NULL)
> > -           return (NULL);
> > +   if (ifr->ifr_rdomainid < 0 ||
> > +       ifr->ifr_rdomainid > RT_TABLEID_MAX)
> > +           return (EINVAL);
> > +   if (!rtable_exists(ifr->ifr_rdomainid))
> > +           return (EADDRNOTAVAIL);
> >  
> > -   ip = mtod(m, struct ip *);
> > -   ip->ip_v = IPVERSION;
> > -   ip->ip_hl = sizeof(struct ip) >> 2;
> > -   ip->ip_id = htons(ip_randomid());
> > -   ip->ip_off = sc->sc_df;
> > -   ip->ip_p = IPPROTO_UDP;
> > -   ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
> > -       m->m_pkthdr.pf.prio : sc->sc_txhprio);
> > -   ip->ip_len = htons(m->m_pkthdr.len);
> > +   if (sc->sc_rdomain == ifr->ifr_rdomainid)
> > +           return (0);
> >  
> > -   ip->ip_src = satosin(src)->sin_addr;
> > -   ip->ip_dst = satosin(dst)->sin_addr;
> > +   if (!ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return (EBUSY);
> >  
> > -   if (sc->sc_ttl > 0)
> > -           ip->ip_ttl = sc->sc_ttl;
> > -   else
> > -           ip->ip_ttl = IPDEFTTL;
> > +   /* commit */
> > +   sc->sc_rdomain = ifr->ifr_rdomainid;
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> >  
> > -   return (m);
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr)
> > +{
> > +   ifr->ifr_rdomainid = sc->sc_rdomain;
> > +
> > +   return (0);
> >  }
> >  
> > +static int
> > +vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +   struct sockaddr *src = (struct sockaddr *)&req->addr;
> > +   struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
> > +   struct sockaddr_in *src4, *dst4;
> >  #ifdef INET6
> > -struct mbuf *
> > -vxlan_encap6(struct ifnet *ifp, struct mbuf *m,
> > -    struct sockaddr *src, struct sockaddr *dst)
> > -{
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > -   struct ip6_hdr          *ip6;
> > -   struct in6_addr         *in6a;
> > -   uint32_t                 flow;
> > -
> > -   /*
> > -    * Remove multicast and broadcast flags or encapsulated packet
> > -    * ends up as multicast or broadcast packet.
> > -    */
> > -   m->m_flags &= ~(M_BCAST|M_MCAST);
> > +   struct sockaddr_in6 *src6, *dst6;
> > +   int error;
> > +#endif
> > +   union vxlan_addr saddr, daddr;
> > +   unsigned int mode = VXLAN_TMODE_ENDPOINT;
> > +   in_port_t port = htons(VXLAN_PORT);
> >  
> > -   M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
> > -   if (m == NULL)
> > -           return (NULL);
> > +   memset(&saddr, 0, sizeof(saddr));
> > +   memset(&daddr, 0, sizeof(daddr));
> >  
> > -   flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
> > -       m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20;
> > +   /* validate */
> > +   switch (src->sa_family) {
> > +   case AF_INET:
> > +           src4 = (struct sockaddr_in *)src;
> > +           if (in_nullhost(src4->sin_addr) ||
> > +               IN_MULTICAST(src4->sin_addr.s_addr))
> > +                   return (EINVAL);
> >  
> > -   ip6 = mtod(m, struct ip6_hdr *);
> > -   ip6->ip6_flow = htonl(flow);
> > -   ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
> > -   ip6->ip6_vfc |= IPV6_VERSION;
> > -   ip6->ip6_nxt = IPPROTO_UDP;
> > -   ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
> > -   if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0)
> > -           goto drop;
> > -   if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0)
> > -           goto drop;
> > +           if (src4->sin_port != htons(0))
> > +                   port = src4->sin_port;
> >  
> > -   if (sc->sc_ttl > 0)
> > -           ip6->ip6_hlim = sc->sc_ttl;
> > -   else
> > -           ip6->ip6_hlim = ip6_defhlim;
> > +           if (dst->sa_family != AF_UNSPEC) {
> > +                   if (dst->sa_family != AF_INET)
> > +                           return (EINVAL);
> > +
> > +                   dst4 = (struct sockaddr_in *)dst;
> > +                   if (in_nullhost(dst4->sin_addr))
> > +                           return (EINVAL);
> > +
> > +                   /* all good */
> > +                   mode = IN_MULTICAST(dst4->sin_addr.s_addr) ?
> > +                       VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
> > +                   daddr.in4 = dst4->sin_addr;
> > +           }
> >  
> > -   if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
> > -           if (in6_selectsrc(&in6a, satosin6(dst), NULL,
> > -               sc->sc_rdomain) != 0)
> > -                   goto drop;
> > +           saddr.in4 = src4->sin_addr;
> > +           break;
> > +
> > +#ifdef INET6
> > +   case AF_INET6:
> > +           src6 = (struct sockaddr_in6 *)src;
> > +           if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
> > +               IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
> > +                   return (EINVAL);
> >  
> > -           ip6->ip6_src = *in6a;
> > +           if (src6->sin6_port != htons(0))
> > +                   port = src6->sin6_port;
> > +
> > +           if (dst->sa_family != AF_UNSPEC) {
> > +                   if (dst->sa_family != AF_INET6)
> > +                           return (EINVAL);
> > +
> > +                   dst6 = (struct sockaddr_in6 *)dst;
> > +                   if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr))
> > +                           return (EINVAL);
> > +
> > +                   if (src6->sin6_scope_id != dst6->sin6_scope_id)
> > +                           return (EINVAL);
> > +
> > +                   /* all good */
> > +                   mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ?
> > +                       VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
> > +                   error = in6_embedscope(&daddr.in6, dst6, NULL);
> > +                   if (error != 0)
> > +                           return (error);
> > +           }
> > +
> > +           error = in6_embedscope(&saddr.in6, src6, NULL);
> > +           if (error != 0)
> > +                   return (error);
> > +
> > +           break;
> > +#endif
> > +   default:
> > +           return (EAFNOSUPPORT);
> >     }
> >  
> > -   if (sc->sc_df)
> > -           SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
> > +   if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 &&
> > +       memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 &&
> > +       sc->sc_port == port)
> > +           return (0);
> >  
> > -   /*
> > -    * The UDP checksum of VXLAN packets should be set to zero,
> > -    * but the IPv6 UDP checksum is not optional.  There is an RFC 6539
> > -    * to relax the IPv6 UDP checksum requirement for tunnels, but it
> > -    * is currently not supported by most implementations.
> > -    */
> > -   m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return (EBUSY);
> >  
> > -   return (m);
> > +   /* commit */
> > +   sc->sc_af = src->sa_family;
> > +   sc->sc_src = saddr;
> > +   sc->sc_dst = daddr;
> > +   sc->sc_port = port;
> > +   sc->sc_mode = mode;
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> >  
> > -drop:
> > -   m_freem(m);
> > -   return (NULL);
> > +   return (0);
> >  }
> > -#endif /* INET6 */
> >  
> > -int
> > -vxlan_output(struct ifnet *ifp, struct mbuf *m)
> > +static int
> > +vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req)
> >  {
> > -   struct vxlan_softc      *sc = (struct vxlan_softc *)ifp->if_softc;
> > -   struct vxlanudphdr      *vu;
> > -   struct sockaddr         *src, *dst;
> > -#if NBRIDGE > 0
> > -   struct bridge_tunneltag *brtag;
> > -#endif
> > -   int                      error, af;
> > -   uint32_t                 tag;
> > -   struct mbuf             *m0;
> > -
> > -   /* VXLAN header, needs new mbuf because of alignment issues */
> > -   MGET(m0, M_DONTWAIT, m->m_type);
> > -   if (m0 == NULL) {
> > -           ifp->if_oerrors++;
> > -           return (ENOBUFS);
> > -   }
> > -   M_MOVE_PKTHDR(m0, m);
> > -   m0->m_next = m;
> > -   m = m0;
> > -   m_align(m, sizeof(*vu));
> > -   m->m_len = sizeof(*vu);
> > -   m->m_pkthdr.len += sizeof(*vu);
> > -
> > -   src = sstosa(&sc->sc_src);
> > -   dst = sstosa(&sc->sc_dst);
> > -   af = src->sa_family;
> > -
> > -   vu = mtod(m, struct vxlanudphdr *);
> > -   vu->vu_u.uh_sport = sc->sc_dstport;
> > -   vu->vu_u.uh_dport = sc->sc_dstport;
> > -   vu->vu_u.uh_ulen = htons(m->m_pkthdr.len);
> > -   vu->vu_u.uh_sum = 0;
> > -   tag = sc->sc_vnetid;
> > -
> > -#if NBRIDGE > 0
> > -   if ((brtag = bridge_tunnel(m)) != NULL) {
> > -           dst = &brtag->brtag_peer.sa;
> > -
> > -           /* If accepting any VNI, source ip address is from brtag */
> > -           if (sc->sc_vnetid == VXLAN_VNI_ANY) {
> > -                   src = &brtag->brtag_local.sa;
> > -                   tag = (uint32_t)brtag->brtag_id;
> > -                   af = src->sa_family;
> > -           }
> > -
> > -           if (dst->sa_family != af) {
> > -                   ifp->if_oerrors++;
> > -                   m_freem(m);
> > -                   return (EINVAL);
> > -           }
> > -   } else
> > +   struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
> > +   struct sockaddr_in *sin;
> > +#ifdef INET6
> > +   struct sockaddr_in6 *sin6;
> >  #endif
> > -   if (sc->sc_vnetid == VXLAN_VNI_ANY) {
> > -           /*
> > -            * If accepting any VNI, build the vxlan header only by
> > -            * bridge_tunneltag or drop packet if the tag does not exist.
> > -            */
> > -           ifp->if_oerrors++;
> > -           m_freem(m);
> > -           return (ENETUNREACH);
> > -   }
> >  
> > -   if (sc->sc_vnetid != VXLAN_VNI_UNSET) {
> > -           vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI);
> > -           vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S);
> > -   } else {
> > -           vu->vu_v.vxlan_flags = htonl(0);
> > -           vu->vu_v.vxlan_id = htonl(0);
> > -   }
> > +   if (sc->sc_af == AF_UNSPEC)
> > +           return (EADDRNOTAVAIL);
> > +   KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
> >  
> > -   switch (af) {
> > +   memset(&req->addr, 0, sizeof(req->addr));
> > +   memset(&req->dstaddr, 0, sizeof(req->dstaddr));
> > +
> > +   /* default to endpoint */
> > +   dstaddr->sa_len = 2;
> > +   dstaddr->sa_family = AF_UNSPEC;
> > +
> > +   switch (sc->sc_af) {
> >     case AF_INET:
> > -           m = vxlan_encap4(ifp, m, src, dst);
> > +           sin = (struct sockaddr_in *)&req->addr;
> > +           sin->sin_len = sizeof(*sin);
> > +           sin->sin_family = AF_INET;
> > +           sin->sin_addr = sc->sc_src.in4;
> > +           sin->sin_port = sc->sc_port;
> > +
> > +           if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
> > +                   break;
> > +
> > +           sin = (struct sockaddr_in *)&req->dstaddr;
> > +           sin->sin_len = sizeof(*sin);
> > +           sin->sin_family = AF_INET;
> > +           sin->sin_addr = sc->sc_dst.in4;
> >             break;
> > +
> >  #ifdef INET6
> >     case AF_INET6:
> > -           m = vxlan_encap6(ifp, m, src, dst);
> > +           sin6 = (struct sockaddr_in6 *)&req->addr;
> > +           sin6->sin6_len = sizeof(*sin6);
> > +           sin6->sin6_family = AF_INET6;
> > +           in6_recoverscope(sin6, &sc->sc_src.in6);
> > +           sin6->sin6_port = sc->sc_port;
> > +
> > +           if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
> > +                   break;
> > +
> > +           sin6 = (struct sockaddr_in6 *)&req->dstaddr;
> > +           sin6->sin6_len = sizeof(*sin6);
> > +           sin6->sin6_family = AF_INET6;
> > +           in6_recoverscope(sin6, &sc->sc_dst.in6);
> >             break;
> > -#endif /* INET6 */
> > +#endif
> >     default:
> > -           m_freem(m);
> > -           m = NULL;
> > +           unhandled_af(sc->sc_af);
> >     }
> >  
> > -   if (m == NULL) {
> > -           ifp->if_oerrors++;
> > -           return (ENOBUFS);
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_del_tunnel(struct vxlan_softc *sc)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +
> > +   if (sc->sc_af == AF_UNSPEC)
> > +           return (0);
> > +
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return (EBUSY);
> > +
> > +   /* commit */
> > +   sc->sc_af = AF_UNSPEC;
> > +   memset(&sc->sc_src, 0, sizeof(sc->sc_src));
> > +   memset(&sc->sc_dst, 0, sizeof(sc->sc_dst));
> > +   sc->sc_port = htons(0);
> > +   sc->sc_mode = VXLAN_TMODE_UNSET;
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> > +
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +   uint32_t vni;
> > +
> > +   if (ifr->ifr_vnetid < VXLAN_VNI_MIN ||
> > +       ifr->ifr_vnetid > VXLAN_VNI_MAX)
> > +           return (EINVAL);
> > +
> > +   vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT);
> > +   if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) &&
> > +       sc->sc_header.vxlan_id == vni)
> > +           return (0);
> > +
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return (EBUSY);
> > +
> > +   /* commit */
> > +   SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
> > +   sc->sc_header.vxlan_id = vni;
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> > +
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr)
> > +{
> > +   uint32_t vni;
> > +
> > +   if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
> > +           return (EADDRNOTAVAIL);
> > +
> > +   vni = ntohl(sc->sc_header.vxlan_id);
> > +   vni &= VXLAN_VNI_MASK;
> > +   vni >>= VXLAN_VNI_SHIFT;
> > +
> > +   ifr->ifr_vnetid = vni;
> > +
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_del_vnetid(struct vxlan_softc *sc)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +
> > +   if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
> > +           return (0);
> > +
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return (EBUSY);
> > +
> > +   /* commit */
> > +   CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
> > +   sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT);
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> > +
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +   struct ifnet *ifp0;
> > +   int error = 0;
> > +
> > +   ifp0 = if_unit(p->ifp_parent);
> > +   if (ifp0 == NULL)
> > +           return (ENXIO);
> > +
> > +   if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
> > +           error = ENXIO;
> > +           goto put;
> >     }
> >  
> > -#if NBRIDGE > 0
> > -   if (brtag != NULL)
> > -           bridge_tunneluntag(m);
> > -#endif
> > +   if (sc->sc_if_index0 == ifp0->if_index)
> > +           goto put;
> >  
> > -   m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> > +           error = EBUSY;
> > +           goto put;
> > +   }
> >  
> > -#if NPF > 0
> > -   pf_pkt_addr_changed(m);
> > +   /* commit */
> > +   sc->sc_if_index0 = ifp0->if_index;
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> > +
> > +put:
> > +   if_put(ifp0);
> > +   return (error);
> > +}
> > +
> > +static int
> > +vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p)
> > +{
> > +   struct ifnet *ifp0;
> > +   int error = 0;
> > +
> > +   ifp0 = if_get(sc->sc_if_index0);
> > +   if (ifp0 == NULL)
> > +           error = EADDRNOTAVAIL;
> > +   else
> > +           strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
> > +   if_put(ifp0);
> > +
> > +   return (error);
> > +}
> > +
> > +static int
> > +vxlan_del_parent(struct vxlan_softc *sc)
> > +{
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +
> > +   if (sc->sc_if_index0 == 0)
> > +           return (0);
> > +
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING))
> > +           return (EBUSY);
> > +
> > +   /* commit */
> > +   sc->sc_if_index0 = 0;
> > +   etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
> > +
> > +   return (0);
> > +}
> > +
> > +static int
> > +vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
> > +{
> > +   struct sockaddr_in *sin;
> > +#ifdef INET6
> > +   struct sockaddr_in6 *sin6;
> > +   struct sockaddr_in6 src6 = {
> > +           .sin6_len = sizeof(src6),
> > +           .sin6_family = AF_UNSPEC,
> > +   };
> > +   int error;
> >  #endif
> > +   union vxlan_addr endpoint;
> > +   unsigned int type;
> > +
> > +   switch (sc->sc_mode) {
> > +   case VXLAN_TMODE_UNSET:
> > +           return (ENOPROTOOPT);
> > +   case VXLAN_TMODE_P2P:
> > +           return (EPROTONOSUPPORT);
> > +   default:
> > +           break;
> > +   }
> > +
> > +   /* ignore ifba_ifsname */
> > +
> > +   if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
> > +           return (EINVAL);
> > +   switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
> > +   case IFBAF_DYNAMIC:
> > +           type = EBE_DYNAMIC;
> > +           break;
> > +   case IFBAF_STATIC:
> > +           type = EBE_STATIC;
> > +           break;
> > +   default:
> > +           return (EINVAL);
> > +   }
> > +
> > +   memset(&endpoint, 0, sizeof(endpoint));
> >  
> > -   switch (af) {
> > +   if (ifba->ifba_dstsa.ss_family != sc->sc_af)
> > +           return (EAFNOSUPPORT);
> > +   switch (ifba->ifba_dstsa.ss_family) {
> >     case AF_INET:
> > -           error = ip_output(m, NULL, NULL, IP_RAWOUTPUT,
> > -               &sc->sc_imo, NULL, 0);
> > +           sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
> > +           if (in_nullhost(sin->sin_addr) ||
> > +               IN_MULTICAST(sin->sin_addr.s_addr))
> > +                   return (EADDRNOTAVAIL);
> > +
> > +           if (sin->sin_port != htons(0))
> > +                   return (EADDRNOTAVAIL);
> > +
> > +           endpoint.in4 = sin->sin_addr;
> >             break;
> > +
> >  #ifdef INET6
> >     case AF_INET6:
> > -           error = ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
> > +           sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
> > +           if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
> > +               IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
> > +                   return (EADDRNOTAVAIL);
> > +
> > +           in6_recoverscope(&src6, &sc->sc_src.in6);
> > +           if (src6.sin6_scope_id != sin6->sin6_scope_id)
> > +                   return (EADDRNOTAVAIL);
> > +
> > +           if (sin6->sin6_port != htons(0))
> > +                   return (EADDRNOTAVAIL);
> > +
> > +           error = in6_embedscope(&endpoint.in6, sin6, NULL);
> > +           if (error != 0)
> > +                   return (error);
> > +
> >             break;
> > -#endif /* INET6 */
> > -   default:
> > -           m_freem(m);
> > -           error = EAFNOSUPPORT;
> > +#endif
> > +   default: /* AF_UNSPEC */
> > +           return (EADDRNOTAVAIL);
> >     }
> >  
> > -   if (error)
> > -           ifp->if_oerrors++;
> > +   return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
> > +       &ifba->ifba_dst, type));
> > +}
> >  
> > -   return (error);
> > +static int
> > +vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
> > +{
> > +   return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
> >  }
> >  
> >  void
> > -vxlan_addr_change(void *arg)
> > +vxlan_detach_hook(void *arg)
> >  {
> > -   struct vxlan_softc      *sc = arg;
> > -   struct ifnet            *ifp = &sc->sc_ac.ac_if;
> > -   int                      error;
> > -
> > -   /*
> > -    * Reset the configuration after resume or any possible address
> > -    * configuration changes.
> > -    */
> > -   if ((error = vxlan_config(ifp, NULL, NULL))) {
> > -           /*
> > -            * The source address of the tunnel can temporarily disappear,
> > -            * after a link state change when running the DHCP client,
> > -            * so keep it configured.
> > -            */
> > +   struct vxlan_softc *sc = arg;
> > +   struct ifnet *ifp = &sc->sc_ac.ac_if;
> > +
> > +   if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> > +           vxlan_down(sc);
> > +           CLR(ifp->if_flags, IFF_UP);
> >     }
> > +
> > +   sc->sc_if_index0 = 0;
> >  }
> >  
> > -void
> > -vxlan_if_change(void *arg)
> > +static int
> > +vxlan_eb_port_eq(void *arg, void *a, void *b)
> >  {
> > -   struct vxlan_softc      *sc = arg;
> > -   struct ifnet            *ifp = &sc->sc_ac.ac_if;
> > +   const union vxlan_addr *va = a, *vb = b;
> > +   size_t i;
> >  
> > -   /*
> > -    * Reset the configuration after the parent interface disappeared.
> > -    */
> > -   vxlan_multicast_cleanup(ifp);
> > -   memset(&sc->sc_src, 0, sizeof(sc->sc_src));
> > -   memset(&sc->sc_dst, 0, sizeof(sc->sc_dst));
> > -   sc->sc_dstport = htons(VXLAN_PORT);
> > +   for (i = 0; i < nitems(va->in6.s6_addr32); i++) {
> > +           if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i])
> > +                   return (0);
> > +   }
> > +
> > +   return (1);
> >  }
> >  
> > -void
> > -vxlan_link_change(void *arg)
> > +static void *
> > +vxlan_eb_port_take(void *arg, void *port)
> >  {
> > -   struct vxlan_softc      *sc = arg;
> > -   struct ifnet            *ifp = &sc->sc_ac.ac_if;
> > +   union vxlan_addr *endpoint;
> >  
> > -   /*
> > -    * The machine might have lost its multicast associations after
> > -    * link state changes.  This fixes a problem with VMware after
> > -    * suspend/resume of the host or guest.
> > -    */
> > -   (void)vxlan_config(ifp, NULL, NULL);
> > +   endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT);
> > +   if (endpoint == NULL)
> > +           return (NULL);
> > +
> > +   *endpoint = *(union vxlan_addr *)port;
> > +
> > +   return (endpoint);
> >  }
> > +
> > +static void
> > +vxlan_eb_port_rele(void *arg, void *port)
> > +{
> > +   union vxlan_addr *endpoint = port;
> > +
> > +   pool_put(&vxlan_endpoint_pool, endpoint);
> > +}
> > +
> > +static size_t
> > +vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
> > +{
> > +   struct vxlan_softc *sc = arg;
> > +
> > +   return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
> > +}
> > +
> > +static void
> > +vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
> > +{
> > +   struct vxlan_softc *sc = arg;
> > +   union vxlan_addr *endpoint = port;
> > +
> > +   switch (sc->sc_af) {
> > +   case AF_INET: {
> > +           struct sockaddr_in *sin = (struct sockaddr_in *)ss;
> > +
> > +           sin->sin_len = sizeof(*sin);
> > +           sin->sin_family = AF_INET;
> > +           sin->sin_addr = endpoint->in4;
> > +           break;
> > +   }
> > +#ifdef INET6
> > +   case AF_INET6: {
> > +           struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
> > +
> > +           sin6->sin6_len = sizeof(*sin6);
> > +           sin6->sin6_family = AF_INET6;
> > +           in6_recoverscope(sin6, &endpoint->in6);
> > +           break;
> > +   }
> > +#endif /* INET6 */
> > +   default:
> > +           unhandled_af(sc->sc_af);
> > +   }
> > +}
> > +
> > +static inline int
> > +vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp)
> > +{
> > +   size_t i;
> > +
> > +   if (ap->p_header.vxlan_id > bp->p_header.vxlan_id)
> > +           return (1);
> > +   if (ap->p_header.vxlan_id < bp->p_header.vxlan_id)
> > +           return (-1);
> > +   if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags)
> > +           return (1);
> > +   if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags)
> > +           return (-1);
> > +
> > +   for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) {
> > +           if (ap->p_addr.in6.s6_addr32[i] >
> > +               bp->p_addr.in6.s6_addr32[i])
> > +                   return (1);
> > +           if (ap->p_addr.in6.s6_addr32[i] <
> > +               bp->p_addr.in6.s6_addr32[i])
> > +                   return (-1);
> > +   }
> > +
> > +   return (0);
> > +}
> > +
> > +RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
> > Index: netinet/udp_usrreq.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
> > retrieving revision 1.268
> > diff -u -p -r1.268 udp_usrreq.c
> > --- netinet/udp_usrreq.c    4 Jan 2022 06:32:40 -0000       1.268
> > +++ netinet/udp_usrreq.c    11 Feb 2022 05:11:13 -0000
> > @@ -112,11 +112,6 @@
> >  #include <net/pipex.h>
> >  #endif
> >  
> > -#include "vxlan.h"
> > -#if NVXLAN > 0
> > -#include <net/if_vxlan.h>
> > -#endif
> > -
> >  /*
> >   * UDP protocol implementation.
> >   * Per RFC 768, August, 1980.
> > @@ -345,15 +340,6 @@ udp_input(struct mbuf **mp, int *offp, i
> >             break;
> >  #endif /* INET6 */
> >     }
> > -
> > -#if NVXLAN > 0
> > -   if (vxlan_enable > 0 &&
> > -#if NPF > 0
> > -       !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) &&
> > -#endif
> > -       vxlan_lookup(m, uh, iphlen, &srcsa.sa, &dstsa.sa) != 0)
> > -           return IPPROTO_DONE;
> > -#endif
> >  
> >     if (m->m_flags & (M_BCAST|M_MCAST)) {
> >             struct inpcb *last;
> > Index: conf/files
> > ===================================================================
> > RCS file: /cvs/src/sys/conf/files,v
> > retrieving revision 1.709
> > diff -u -p -r1.709 files
> > --- conf/files      8 Feb 2022 17:25:11 -0000       1.709
> > +++ conf/files      11 Feb 2022 05:11:13 -0000
> > @@ -573,7 +573,7 @@ pseudo-device mpip: ifnet, mpls
> >  pseudo-device bpe: ifnet, ether, ifmedia, etherbridge
> >  pseudo-device vether: ifnet, ether
> >  pseudo-device pppx: ifnet
> > -pseudo-device vxlan: ifnet, ether, ifmedia
> > +pseudo-device vxlan: ifnet, ether, etherbridge
> >  pseudo-device wg: ifnet
> >  
> >  pseudo-device ksyms
> 

-- 
:wq Claudio

Reply via email to