Author: sephe
Date: Wed Aug  9 05:59:45 2017
New Revision: 322299
URL: https://svnweb.freebsd.org/changeset/base/322299

Log:
  hyperv/hn: Implement transparent mode network VF.
  
  How network VF works with hn(4) on Hyper-V in transparent mode:
  
  - Each network VF has a cooresponding hn(4).
  - The network VF and the it's cooresponding hn(4) have the same hardware
    address.
  - Once the network VF is attached, the cooresponding hn(4) waits several
    seconds to make sure that the network VF attach routing completes, then:
    o  Set the intersection of the network VF's if_capabilities and the
       cooresponding hn(4)'s if_capabilities to the cooresponding hn(4)'s
       if_capabilities.  And adjust the cooresponding hn(4) if_capable and
       if_hwassist accordingly. (*)
    o  Make sure that the cooresponding hn(4)'s TSO parameters meet the
       constraints posed by both the network VF and the cooresponding hn(4).
       (*)
    o  The network VF's if_input is overridden.  The overriding if_input
       changes the input packet's rcvif to the cooreponding hn(4).  The
       network layers are tricked into thinking that all packets are
       neceived by the cooresponding hn(4).
    o  If the cooresponding hn(4) was brought up, bring up the network VF.
       The transmission dispatched to the cooresponding hn(4) are
       redispatched to the network VF.
    o  Bringing down the cooresponding hn(4) also brings down the network
       VF.
    o  All IOCTLs issued to the cooresponding hn(4) are pass-through'ed to
       the network VF; the cooresponding hn(4) changes its internal state
       if necessary.
    o  The media status of the cooresponding hn(4) solely relies on the
       network VF.
    o  If there are multicast filters on the cooresponding hn(4), allmulti
       will be enabled on the network VF. (**)
  - Once the network VF is detached.  Undo all damages did to the
    cooresponding hn(4) in the above item.
  
  NOTE:
  No operation should be issued directly to the network VF, if the
  network VF transparent mode is enabled.  The network VF transparent mode
  can be enabled by setting tunable hw.hn.vf_transparent to 1.  The network
  VF transparent mode is _not_ enabled by default, as of this commit.
  
  The benefit of the network VF transparent mode is that the network VF
  attachment and detachment are transparent to all network layers; e.g. live
  migration detaches and reattaches the network VF.
  
  The major drawbacks of the network VF transparent mode:
  - The netmap(4) support is lost, even if the VF supports it.
  - ALTQ does not work, since if_start method cannot be properly supported.
  
  (*)
  These decisions were made so that things will not be messed up too much
  during the transition period.
  
  (**)
  This does _not_ need to go through the fancy multicast filter management
  stuffs like what vlan(4) has, at least currently:
  - As of this write, multicast does not work in Azure.
  - As of this write, multicast packets go through the cooresponding hn(4).
  
  MFC after:    3 days
  Sponsored by: Microsoft
  Differential Revision:        https://reviews.freebsd.org/D11803

Modified:
  head/sys/dev/hyperv/netvsc/if_hn.c
  head/sys/dev/hyperv/netvsc/if_hnreg.h
  head/sys/dev/hyperv/netvsc/if_hnvar.h

Modified: head/sys/dev/hyperv/netvsc/if_hn.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/if_hn.c  Wed Aug  9 05:21:57 2017        
(r322298)
+++ head/sys/dev/hyperv/netvsc/if_hn.c  Wed Aug  9 05:59:45 2017        
(r322299)
@@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$");
 
 #define HN_VFMAP_SIZE_DEF              8
 
+#define HN_XPNT_VF_ATTWAIT_MIN         2       /* seconds */
+
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT                 512
 
@@ -263,6 +265,7 @@ static void                 hn_ifnet_event(void *, struct 
ifnet *, i
 static void                    hn_ifaddr_event(void *, struct ifnet *);
 static void                    hn_ifnet_attevent(void *, struct ifnet *);
 static void                    hn_ifnet_detevent(void *, struct ifnet *);
+static void                    hn_ifnet_lnkevent(void *, struct ifnet *, int);
 
 static bool                    hn_ismyvf(const struct hn_softc *,
                                    const struct ifnet *);
@@ -270,6 +273,15 @@ static void                        hn_rxvf_change(struct 
hn_softc *,
                                    struct ifnet *, bool);
 static void                    hn_rxvf_set(struct hn_softc *, struct ifnet *);
 static void                    hn_rxvf_set_task(void *, int);
+static void                    hn_xpnt_vf_input(struct ifnet *, struct mbuf *);
+static int                     hn_xpnt_vf_iocsetflags(struct hn_softc *);
+static int                     hn_xpnt_vf_iocsetcaps(struct hn_softc *,
+                                   struct ifreq *);
+static void                    hn_xpnt_vf_saveifflags(struct hn_softc *);
+static bool                    hn_xpnt_vf_isready(struct hn_softc *);
+static void                    hn_xpnt_vf_setready(struct hn_softc *);
+static void                    hn_xpnt_vf_init_taskfunc(void *, int);
+static void                    hn_xpnt_vf_init(struct hn_softc *);
 
 static int                     hn_rndis_rxinfo(const void *, int,
                                    struct hn_rxinfo *);
@@ -322,6 +334,8 @@ static int                  
hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
 static int                     hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS);
 static int                     hn_vflist_sysctl(SYSCTL_HANDLER_ARGS);
 static int                     hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS);
+static int                     hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS);
+static int                     hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS);
 
 static void                    hn_stop(struct hn_softc *, bool);
 static void                    hn_init_locked(struct hn_softc *);
@@ -352,6 +366,7 @@ static void                 hn_disable_rx(struct hn_softc 
*);
 static void                    hn_drain_rxtx(struct hn_softc *, int);
 static void                    hn_polling(struct hn_softc *, u_int);
 static void                    hn_chan_polling(struct vmbus_channel *, u_int);
+static void                    hn_mtu_change_fixup(struct hn_softc *);
 
 static void                    hn_update_link_status(struct hn_softc *);
 static void                    hn_change_network(struct hn_softc *);
@@ -529,6 +544,22 @@ SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTL
 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING,
     0, 0, hn_vfmap_sysctl, "A", "VF mapping");
 
+/* Transparent VF */
+static int                     hn_xpnt_vf = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN,
+    &hn_xpnt_vf, 0, "Transparent VF mod");
+
+/* Accurate BPF support for Transparent VF */
+static int                     hn_xpnt_vf_accbpf = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN,
+    &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF");
+
+/* Extra wait for transparent VF attach routing; unit seconds. */
+static int                     hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN,
+    &hn_xpnt_vf_attwait, 0,
+    "Extra wait for transparent VF attach routing; unit: seconds");
+
 static u_int                   hn_cpu_index;   /* next CPU for channel */
 static struct taskqueue                **hn_tx_taskque;/* shared TX taskqueues 
*/
 
@@ -807,8 +838,12 @@ hn_rxfilter_config(struct hn_softc *sc)
 
        HN_LOCK_ASSERT(sc);
 
-       if ((ifp->if_flags & IFF_PROMISC) ||
-           (sc->hn_flags & HN_FLAG_RXVF)) {
+       /*
+        * If the non-transparent mode VF is activated, we don't know how
+        * its RX filter is configured, so stick the synthetic device in
+        * the promiscous mode.
+        */
+       if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) {
                filter = NDIS_PACKET_TYPE_PROMISCUOUS;
        } else {
                filter = NDIS_PACKET_TYPE_DIRECTED;
@@ -1086,7 +1121,7 @@ hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp,
        }
 
        hn_nvs_set_datapath(sc,
-           rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
+           rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH);
 
        hn_rxvf_set(sc, rxvf ? ifp : NULL);
 
@@ -1126,7 +1161,323 @@ hn_ifaddr_event(void *arg, struct ifnet *ifp)
        hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP);
 }
 
+static int
+hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr)
+{
+       struct ifnet *ifp, *vf_ifp;
+       uint64_t tmp;
+       int error;
+
+       HN_LOCK_ASSERT(sc);
+       ifp = sc->hn_ifp;
+       vf_ifp = sc->hn_vf_ifp;
+
+       /*
+        * Fix up requested capabilities w/ supported capabilities,
+        * since the supported capabilities could have been changed.
+        */
+       ifr->ifr_reqcap &= ifp->if_capabilities;
+       /* Pass SIOCSIFCAP to VF. */
+       error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr);
+
+       /*
+        * NOTE:
+        * The error will be propagated to the callers, however, it
+        * is _not_ useful here.
+        */
+
+       /*
+        * Merge VF's enabled capabilities.
+        */
+       ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities;
+
+       tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc);
+       if (ifp->if_capenable & IFCAP_TXCSUM)
+               ifp->if_hwassist |= tmp;
+       else
+               ifp->if_hwassist &= ~tmp;
+
+       tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc);
+       if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+               ifp->if_hwassist |= tmp;
+       else
+               ifp->if_hwassist &= ~tmp;
+
+       tmp = vf_ifp->if_hwassist & CSUM_IP_TSO;
+       if (ifp->if_capenable & IFCAP_TSO4)
+               ifp->if_hwassist |= tmp;
+       else
+               ifp->if_hwassist &= ~tmp;
+
+       tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO;
+       if (ifp->if_capenable & IFCAP_TSO6)
+               ifp->if_hwassist |= tmp;
+       else
+               ifp->if_hwassist &= ~tmp;
+
+       return (error);
+}
+
+static int
+hn_xpnt_vf_iocsetflags(struct hn_softc *sc)
+{
+       struct ifnet *vf_ifp;
+       struct ifreq ifr;
+
+       HN_LOCK_ASSERT(sc);
+       vf_ifp = sc->hn_vf_ifp;
+
+       memset(&ifr, 0, sizeof(ifr));
+       strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+       ifr.ifr_flags = vf_ifp->if_flags & 0xffff;
+       ifr.ifr_flagshigh = vf_ifp->if_flags >> 16;
+       return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
+}
+
 static void
+hn_xpnt_vf_saveifflags(struct hn_softc *sc)
+{
+       struct ifnet *ifp = sc->hn_ifp;
+       int allmulti = 0;
+
+       HN_LOCK_ASSERT(sc);
+
+       /* XXX vlan(4) style mcast addr maintenance */
+       if (!TAILQ_EMPTY(&ifp->if_multiaddrs))
+               allmulti = IFF_ALLMULTI;
+
+       /* Always set the VF's if_flags */
+       sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti;
+}
+
+static void
+hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m)
+{
+       struct rm_priotracker pt;
+       struct ifnet *hn_ifp = NULL;
+       struct mbuf *mn;
+
+       /*
+        * XXX racy, if hn(4) ever detached.
+        */
+       rm_rlock(&hn_vfmap_lock, &pt);
+       if (vf_ifp->if_index < hn_vfmap_size)
+               hn_ifp = hn_vfmap[vf_ifp->if_index];
+       rm_runlock(&hn_vfmap_lock, &pt);
+
+       if (hn_ifp != NULL) {
+               /*
+                * Fix up rcvif and go through hn(4)'s if_input and 
+                * increase ipackets.
+                */
+               for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
+                       /* Allow tapping on the VF. */
+                       ETHER_BPF_MTAP(vf_ifp, mn);
+                       mn->m_pkthdr.rcvif = hn_ifp;
+                       if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
+               }
+               hn_ifp->if_input(hn_ifp, m);
+       } else {
+               /*
+                * In the middle of the transition; free this
+                * mbuf chain.
+                */
+               while (m != NULL) {
+                       mn = m->m_nextpkt;
+                       m->m_nextpkt = NULL;
+                       m_freem(m);
+                       m = mn;
+               }
+       }
+}
+
+static void
+hn_mtu_change_fixup(struct hn_softc *sc)
+{
+       struct ifnet *ifp;
+
+       HN_LOCK_ASSERT(sc);
+       ifp = sc->hn_ifp;
+
+       hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
+#if __FreeBSD_version >= 1100099
+       if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
+               hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
+#endif
+}
+
+static void
+hn_xpnt_vf_setready(struct hn_softc *sc)
+{
+       struct ifnet *ifp, *vf_ifp;
+       struct ifreq ifr;
+
+       HN_LOCK_ASSERT(sc);
+       ifp = sc->hn_ifp;
+       vf_ifp = sc->hn_vf_ifp;
+
+       /*
+        * Mark the VF ready.
+        */
+       sc->hn_vf_rdytick = 0;
+
+       /*
+        * Save information for restoration.
+        */
+       sc->hn_saved_caps = ifp->if_capabilities;
+       sc->hn_saved_tsomax = ifp->if_hw_tsomax;
+       sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount;
+       sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize;
+
+       /*
+        * Intersect supported/enabled capabilities.
+        *
+        * NOTE:
+        * if_hwassist is not changed here.
+        */
+       ifp->if_capabilities &= vf_ifp->if_capabilities;
+       ifp->if_capenable &= ifp->if_capabilities;
+
+       /*
+        * Fix TSO settings.
+        */
+       if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax)
+               ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax;
+       if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount)
+               ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount;
+       if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize)
+               ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize;
+
+       /*
+        * Change VF's enabled capabilities.
+        */
+       memset(&ifr, 0, sizeof(ifr));
+       strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+       ifr.ifr_reqcap = ifp->if_capenable;
+       hn_xpnt_vf_iocsetcaps(sc, &ifr);
+
+       if (ifp->if_mtu != ETHERMTU) {
+               int error;
+
+               /*
+                * Change VF's MTU.
+                */
+               memset(&ifr, 0, sizeof(ifr));
+               strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+               ifr.ifr_mtu = ifp->if_mtu;
+               error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr);
+               if (error) {
+                       if_printf(ifp, "%s SIOCSIFMTU %u failed\n",
+                           vf_ifp->if_xname, ifp->if_mtu);
+                       if (ifp->if_mtu > ETHERMTU) {
+                               if_printf(ifp, "change MTU to %d\n", ETHERMTU);
+
+                               /*
+                                * XXX
+                                * No need to adjust the synthetic parts' MTU;
+                                * failure of the adjustment will cause us
+                                * infinite headache.
+                                */
+                               ifp->if_mtu = ETHERMTU;
+                               hn_mtu_change_fixup(sc);
+                       }
+               }
+       }
+}
+
+static bool
+hn_xpnt_vf_isready(struct hn_softc *sc)
+{
+
+       HN_LOCK_ASSERT(sc);
+
+       if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL)
+               return (false);
+
+       if (sc->hn_vf_rdytick == 0)
+               return (true);
+
+       if (sc->hn_vf_rdytick > ticks)
+               return (false);
+
+       /* Mark VF as ready. */
+       hn_xpnt_vf_setready(sc);
+       return (true);
+}
+
+static void
+hn_xpnt_vf_init(struct hn_softc *sc)
+{
+       int error;
+
+       HN_LOCK_ASSERT(sc);
+
+       KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
+           ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
+
+       if (bootverbose) {
+               if_printf(sc->hn_ifp, "try bringing up %s\n",
+                   sc->hn_vf_ifp->if_xname);
+       }
+
+       /*
+        * Bring the VF up.
+        */
+       hn_xpnt_vf_saveifflags(sc);
+       sc->hn_vf_ifp->if_flags |= IFF_UP;
+       error = hn_xpnt_vf_iocsetflags(sc);
+       if (error) {
+               if_printf(sc->hn_ifp, "bringing up %s failed: %d\n",
+                   sc->hn_vf_ifp->if_xname, error);
+               return;
+       }
+
+       /*
+        * NOTE:
+        * Datapath setting must happen _after_ bringing the VF up.
+        */
+       hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
+
+       /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+       rm_wlock(&sc->hn_vf_lock);
+       sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
+       rm_wunlock(&sc->hn_vf_lock);
+}
+
+static void
+hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused)
+{
+       struct hn_softc *sc = xsc;
+
+       HN_LOCK(sc);
+
+       if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
+               goto done;
+       if (sc->hn_vf_ifp == NULL)
+               goto done;
+       if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+               goto done;
+
+       if (sc->hn_vf_rdytick != 0) {
+               /* Mark VF as ready. */
+               hn_xpnt_vf_setready(sc);
+       }
+
+       if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) {
+               /*
+                * Delayed VF initialization.
+                */
+               if (bootverbose) {
+                       if_printf(sc->hn_ifp, "delayed initialize %s\n",
+                           sc->hn_vf_ifp->if_xname);
+               }
+               hn_xpnt_vf_init(sc);
+       }
+done:
+       HN_UNLOCK(sc);
+}
+
+static void
 hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
 {
        struct hn_softc *sc = xsc;
@@ -1145,6 +1496,16 @@ hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
                goto done;
        }
 
+       if (hn_xpnt_vf && ifp->if_start != NULL) {
+               /*
+                * ifnet.if_start is _not_ supported by transparent
+                * mode VF; mainly due to the IFF_DRV_OACTIVE flag.
+                */
+               if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported "
+                   "in transparent VF mode.\n", ifp->if_xname);
+               goto done;
+       }
+
        rm_wlock(&hn_vfmap_lock);
 
        if (ifp->if_index >= hn_vfmap_size) {
@@ -1168,7 +1529,37 @@ hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
 
        rm_wunlock(&hn_vfmap_lock);
 
+       /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+       rm_wlock(&sc->hn_vf_lock);
+       KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
+           ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
        sc->hn_vf_ifp = ifp;
+       rm_wunlock(&sc->hn_vf_lock);
+
+       if (hn_xpnt_vf) {
+               int wait_ticks;
+
+               /*
+                * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp.
+                * Save vf_ifp's current if_input for later restoration.
+                */
+               sc->hn_vf_input = ifp->if_input;
+               ifp->if_input = hn_xpnt_vf_input;
+
+               /*
+                * Stop link status management; use the VF's.
+                */
+               hn_suspend_mgmt(sc);
+
+               /*
+                * Give VF sometime to complete its attach routing.
+                */
+               wait_ticks = hn_xpnt_vf_attwait * hz;
+               sc->hn_vf_rdytick = ticks + wait_ticks;
+
+               taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init,
+                   wait_ticks);
+       }
 done:
        HN_UNLOCK(sc);
 }
@@ -1186,7 +1577,61 @@ hn_ifnet_detevent(void *xsc, struct ifnet *ifp)
        if (!hn_ismyvf(sc, ifp))
                goto done;
 
+       if (hn_xpnt_vf) {
+               /*
+                * Make sure that the delayed initialization is not running.
+                *
+                * NOTE:
+                * - This lock _must_ be released, since the hn_vf_init task
+                *   will try holding this lock.
+                * - It is safe to release this lock here, since the
+                *   hn_ifnet_attevent() is interlocked by the hn_vf_ifp.
+                *
+                * XXX racy, if hn(4) ever detached.
+                */
+               HN_UNLOCK(sc);
+               taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init);
+               HN_LOCK(sc);
+
+               KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved",
+                   sc->hn_ifp->if_xname));
+               ifp->if_input = sc->hn_vf_input;
+               sc->hn_vf_input = NULL;
+
+               if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+                       hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
+
+               if (sc->hn_vf_rdytick == 0) {
+                       /*
+                        * The VF was ready; restore some settings.
+                        */
+                       sc->hn_ifp->if_capabilities = sc->hn_saved_caps;
+                       /*
+                        * NOTE:
+                        * There is _no_ need to fixup if_capenable and
+                        * if_hwassist, since the if_capabilities before
+                        * restoration was an intersection of the VF's
+                        * if_capabilites and the synthetic device's
+                        * if_capabilites.
+                        */
+                       sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax;
+                       sc->hn_ifp->if_hw_tsomaxsegcount =
+                           sc->hn_saved_tsosegcnt;
+                       sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz;
+               }
+
+               /*
+                * Resume link status management, which was suspended
+                * by hn_ifnet_attevent().
+                */
+               hn_resume_mgmt(sc);
+       }
+
+       /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+       rm_wlock(&sc->hn_vf_lock);
+       sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
        sc->hn_vf_ifp = NULL;
+       rm_wunlock(&sc->hn_vf_lock);
 
        rm_wlock(&hn_vfmap_lock);
 
@@ -1205,6 +1650,15 @@ done:
        HN_UNLOCK(sc);
 }
 
+static void
+hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state)
+{
+       struct hn_softc *sc = xsc;
+
+       if (sc->hn_vf_ifp == ifp)
+               if_link_state_change(sc->hn_ifp, link_state);
+}
+
 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
 static const struct hyperv_guid g_net_vsc_device_type = {
        .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
@@ -1236,6 +1690,9 @@ hn_attach(device_t dev)
        sc->hn_dev = dev;
        sc->hn_prichan = vmbus_get_channel(dev);
        HN_LOCK_INIT(sc);
+       rm_init(&sc->hn_vf_lock, "hnvf");
+       if (hn_xpnt_vf && hn_xpnt_vf_accbpf)
+               sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
 
        /*
         * Initialize these tunables once.
@@ -1275,6 +1732,18 @@ hn_attach(device_t dev)
        TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
            hn_netchg_status_taskfunc, sc);
 
+       if (hn_xpnt_vf) {
+               /*
+                * Setup taskqueue for VF tasks, e.g. delayed VF bringing up.
+                */
+               sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK,
+                   taskqueue_thread_enqueue, &sc->hn_vf_taskq);
+               taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf",
+                   device_get_nameunit(dev));
+               TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0,
+                   hn_xpnt_vf_init_taskfunc, sc);
+       }
+
        /*
         * Allocate ifnet and setup its name earlier, so that if_printf
         * can be used by functions, which will be called after
@@ -1401,6 +1870,14 @@ hn_attach(device_t dev)
        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
            CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
            hn_hwassist_sysctl, "A", "hwassist");
+       SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max",
+           CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size");
+       SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt",
+           CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0,
+           "max # of TSO segments");
+       SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz",
+           CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0,
+           "max size of TSO segment");
        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
            CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
            hn_rxfilter_sysctl, "A", "rxfilter");
@@ -1445,9 +1922,20 @@ hn_attach(device_t dev)
        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
            CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
            hn_vf_sysctl, "A", "Virtual Function's name");
-       SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
-           CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
-           hn_rxvf_sysctl, "A", "activated Virtual Function's name");
+       if (!hn_xpnt_vf) {
+               SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
+                   CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+                   hn_rxvf_sysctl, "A", "activated Virtual Function's name");
+       } else {
+               SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled",
+                   CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+                   hn_xpnt_vf_enabled_sysctl, "I",
+                   "Transparent VF enabled");
+               SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf",
+                   CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+                   hn_xpnt_vf_accbpf_sysctl, "I",
+                   "Accurate BPF for transparent VF");
+       }
 
        /*
         * Setup the ifmedia, which has been initialized earlier.
@@ -1480,7 +1968,7 @@ hn_attach(device_t dev)
                ifp->if_qflush = hn_xmit_qflush;
        }
 
-       ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
+       ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE;
 #ifdef foo
        /* We can't diff IPv6 packets from IPv4 packets on RX path. */
        ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
@@ -1515,7 +2003,13 @@ hn_attach(device_t dev)
        ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
 
        if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
+               /*
+                * Lock hn_set_tso_maxsize() to simplify its
+                * internal logic.
+                */
+               HN_LOCK(sc);
                hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
+               HN_UNLOCK(sc);
                ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
                ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
        }
@@ -1536,10 +2030,15 @@ hn_attach(device_t dev)
        sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
        hn_update_link_status(sc);
 
-       sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
-           hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
-       sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
-           hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
+       if (!hn_xpnt_vf) {
+               sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
+                   hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
+               sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
+                   hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
+       } else {
+               sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event,
+                   hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY);
+       }
 
        /*
         * NOTE:
@@ -1566,6 +2065,14 @@ hn_detach(device_t dev)
        struct hn_softc *sc = device_get_softc(dev);
        struct ifnet *ifp = sc->hn_ifp, *vf_ifp;
 
+       if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
+               /*
+                * In case that the vmbus missed the orphan handler
+                * installation.
+                */
+               vmbus_xact_ctx_orphan(sc->hn_xact);
+       }
+
        if (sc->hn_ifaddr_evthand != NULL)
                EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
        if (sc->hn_ifnet_evthand != NULL)
@@ -1578,20 +2085,14 @@ hn_detach(device_t dev)
                EVENTHANDLER_DEREGISTER(ifnet_departure_event,
                    sc->hn_ifnet_dethand);
        }
+       if (sc->hn_ifnet_lnkhand != NULL)
+               EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand);
 
        vf_ifp = sc->hn_vf_ifp;
        __compiler_membar();
        if (vf_ifp != NULL)
                hn_ifnet_detevent(sc, vf_ifp);
 
-       if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
-               /*
-                * In case that the vmbus missed the orphan handler
-                * installation.
-                */
-               vmbus_xact_ctx_orphan(sc->hn_xact);
-       }
-
        if (device_is_attached(dev)) {
                HN_LOCK(sc);
                if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
@@ -1621,6 +2122,8 @@ hn_detach(device_t dev)
                free(sc->hn_tx_taskqs, M_DEVBUF);
        }
        taskqueue_free(sc->hn_mgmt_taskq0);
+       if (sc->hn_vf_taskq != NULL)
+               taskqueue_free(sc->hn_vf_taskq);
 
        if (sc->hn_xact != NULL) {
                /*
@@ -1634,6 +2137,7 @@ hn_detach(device_t dev)
        if_free(ifp);
 
        HN_LOCK_DESTROY(sc);
+       rm_destroy(&sc->hn_vf_lock);
        return (0);
 }
 
@@ -2699,7 +3203,8 @@ static int
 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
        struct hn_softc *sc = ifp->if_softc;
-       struct ifreq *ifr = (struct ifreq *)data;
+       struct ifreq *ifr = (struct ifreq *)data, ifr_vf;
+       struct ifnet *vf_ifp;
        int mask, error = 0;
 
        switch (cmd) {
@@ -2728,6 +3233,21 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                        break;
                }
 
+               if (hn_xpnt_vf_isready(sc)) {
+                       vf_ifp = sc->hn_vf_ifp;
+                       ifr_vf = *ifr;
+                       strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname,
+                           sizeof(ifr_vf.ifr_name));
+                       error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU,
+                           (caddr_t)&ifr_vf);
+                       if (error) {
+                               HN_UNLOCK(sc);
+                               if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n",
+                                   vf_ifp->if_xname, ifr->ifr_mtu, error);
+                               break;
+                       }
+               }
+
                /*
                 * Suspend this interface before the synthetic parts
                 * are ripped.
@@ -2756,23 +3276,32 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                ifp->if_mtu = ifr->ifr_mtu;
 
                /*
-                * Make sure that various parameters based on MTU are
-                * still valid, after the MTU change.
+                * Synthetic parts' reattach may change the chimney
+                * sending size; update it.
                 */
                if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
                        hn_set_chim_size(sc, sc->hn_chim_szmax);
-               hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
-#if __FreeBSD_version >= 1100099
-               if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
-                   HN_LRO_LENLIM_MIN(ifp))
-                       hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
-#endif
 
                /*
+                * Make sure that various parameters based on MTU are
+                * still valid, after the MTU change.
+                */
+               hn_mtu_change_fixup(sc);
+
+               /*
                 * All done!  Resume the interface now.
                 */
                hn_resume(sc);
 
+               if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+                       /*
+                        * Since we have reattached the NVS part,
+                        * change the datapath to VF again; in case
+                        * that it is lost, after the NVS was detached.
+                        */
+                       hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
+               }
+
                HN_UNLOCK(sc);
                break;
 
@@ -2784,6 +3313,9 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                        break;
                }
 
+               if (hn_xpnt_vf_isready(sc))
+                       hn_xpnt_vf_saveifflags(sc);
+
                if (ifp->if_flags & IFF_UP) {
                        if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
                                /*
@@ -2794,6 +3326,9 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                                HN_NO_SLEEPING(sc);
                                hn_rxfilter_config(sc);
                                HN_SLEEPING_OK(sc);
+
+                               if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+                                       error = hn_xpnt_vf_iocsetflags(sc);
                        } else {
                                hn_init_locked(sc);
                        }
@@ -2808,8 +3343,23 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
        case SIOCSIFCAP:
                HN_LOCK(sc);
-               mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
+               if (hn_xpnt_vf_isready(sc)) {
+                       ifr_vf = *ifr;
+                       strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname,
+                           sizeof(ifr_vf.ifr_name));
+                       error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf);
+                       HN_UNLOCK(sc);
+                       break;
+               }
+
+               /*
+                * Fix up requested capabilities w/ supported capabilities,
+                * since the supported capabilities could have been changed.
+                */
+               mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^
+                   ifp->if_capenable;
+
                if (mask & IFCAP_TXCSUM) {
                        ifp->if_capenable ^= IFCAP_TXCSUM;
                        if (ifp->if_capenable & IFCAP_TXCSUM)
@@ -2873,11 +3423,42 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                        HN_SLEEPING_OK(sc);
                }
 
+               /* XXX vlan(4) style mcast addr maintenance */
+               if (hn_xpnt_vf_isready(sc)) {
+                       int old_if_flags;
+
+                       old_if_flags = sc->hn_vf_ifp->if_flags;
+                       hn_xpnt_vf_saveifflags(sc);
+
+                       if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) &&
+                           ((old_if_flags ^ sc->hn_vf_ifp->if_flags) &
+                            IFF_ALLMULTI))
+                               error = hn_xpnt_vf_iocsetflags(sc);
+               }
+
                HN_UNLOCK(sc);
                break;
 
        case SIOCSIFMEDIA:
        case SIOCGIFMEDIA:
+               HN_LOCK(sc);
+               if (hn_xpnt_vf_isready(sc)) {
+                       /*
+                        * SIOCGIFMEDIA expects ifmediareq, so don't
+                        * create and pass ifr_vf to the VF here; just
+                        * replace the ifr_name.
+                        */
+                       vf_ifp = sc->hn_vf_ifp;
+                       strlcpy(ifr->ifr_name, vf_ifp->if_xname,
+                           sizeof(ifr->ifr_name));
+                       error = vf_ifp->if_ioctl(vf_ifp, cmd, data);
+                       /* Restore the ifr_name. */
+                       strlcpy(ifr->ifr_name, ifp->if_xname,
+                           sizeof(ifr->ifr_name));
+                       HN_UNLOCK(sc);
+                       break;
+               }
+               HN_UNLOCK(sc);
                error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
                break;
 
@@ -2899,11 +3480,37 @@ hn_stop(struct hn_softc *sc, bool detaching)
        KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
            ("synthetic parts were not attached"));
 
+       /* Clear RUNNING bit ASAP. */
+       atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+
        /* Disable polling. */
        hn_polling(sc, 0);
 
-       /* Clear RUNNING bit _before_ hn_suspend_data() */
-       atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+       if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+               KASSERT(sc->hn_vf_ifp != NULL,
+                   ("%s: VF is not attached", ifp->if_xname));
+
+               /* NOTE: hn_vf_lock for hn_transmit() */
+               rm_wlock(&sc->hn_vf_lock);
+               sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
+               rm_wunlock(&sc->hn_vf_lock);
+
+               /*
+                * NOTE:
+                * Datapath setting must happen _before_ bringing
+                * the VF down.
+                */
+               hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
+
+               /*
+                * Bring the VF down.
+                */
+               hn_xpnt_vf_saveifflags(sc);
+               sc->hn_vf_ifp->if_flags &= ~IFF_UP;
+               hn_xpnt_vf_iocsetflags(sc);
+       }
+
+       /* Suspend data transfers. */
        hn_suspend_data(sc);
 
        /* Clear OACTIVE bit. */
@@ -2912,8 +3519,8 @@ hn_stop(struct hn_softc *sc, bool detaching)
                sc->hn_tx_ring[i].hn_oactive = 0;
 
        /*
-        * If the VF is active, make sure the filter is not 0, even if
-        * the synthetic NIC is down.
+        * If the non-transparent mode VF is active, make sure
+        * that the RX filter still allows packet reception.
         */
        if (!detaching && (sc->hn_flags & HN_FLAG_RXVF))
                hn_rxfilter_config(sc);
@@ -2944,6 +3551,11 @@ hn_init_locked(struct hn_softc *sc)
        /* Clear TX 'suspended' bit. */
        hn_resume_tx(sc, sc->hn_tx_ring_inuse);
 
+       if (hn_xpnt_vf_isready(sc)) {
+               /* Initialize transparent VF. */
+               hn_xpnt_vf_init(sc);
+       }
+
        /* Everything is ready; unleash! */
        atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 
@@ -3567,6 +4179,42 @@ hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS)
 }
 
 static int
+hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS)
+{
+       struct hn_softc *sc = arg1;
+       int error, onoff = 0;
+
+       if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF)
+               onoff = 1;
+       error = sysctl_handle_int(oidp, &onoff, 0, req);
+       if (error || req->newptr == NULL)
+               return (error);
+
+       HN_LOCK(sc);
+       /* NOTE: hn_vf_lock for hn_transmit() */
+       rm_wlock(&sc->hn_vf_lock);
+       if (onoff)
+               sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
+       else
+               sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF;
+       rm_wunlock(&sc->hn_vf_lock);
+       HN_UNLOCK(sc);
+
+       return (0);
+}
+
+static int
+hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS)
+{
+       struct hn_softc *sc = arg1;
+       int enabled = 0;
+
+       if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+               enabled = 1;
+       return (sysctl_handle_int(oidp, &enabled, 0, req));
+}
+
+static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
        const struct ip *ip;
@@ -4282,8 +4930,11 @@ static void
 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
 {
        struct ifnet *ifp = sc->hn_ifp;
+       u_int hw_tsomax;
        int tso_minlen;
 
+       HN_LOCK_ASSERT(sc);
+
        if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
                return;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to