Author: sephe
Date: Mon Jun 13 07:03:00 2016
New Revision: 301860
URL: https://svnweb.freebsd.org/changeset/base/301860

Log:
  MFC 295740,295741,295742
  
  295740
      hyperv/hn: Set the TCP ACK/data segment aggregation limit
  
      Set TCP ACK append limit to 1, i.e. aggregate 2 ACKs at most.  Aggregating
      anything more than 2 hurts TCP sending performance in hyperv.  This
      significantly improves the TCP sending performance when the number of
      concurrent connetion is low (2~8).  And it greatly stabilizes the TCP
      sending performance in other cases.
  
      Set TCP data segments aggregation length limit to 37500.  Without this
      limitation, hn(4) could aggregate ~45 TCP data segments for each
      connection (even at 64 or more connections) before dispatching them to
      socket code; large aggregation slows down ACK sending and eventually
      hurts/destabilizes TCP reception performance.  This setting stabilizes
      and improves TCP reception performance for >4 concurrent connections
      significantly.
  
      Make them sysctls so they could be adjusted.
  
      Reviewed by:        adrian, gallatin (previous version), hselasky 
(previous version)
      Approved by:        adrian (mentor)
      MFC after:  1 week
      Sponsored by:       Microsoft OSTC
      Differential Revision:      https://reviews.freebsd.org/D5185
  
  295741
      hyperv/hn: Add option to allow sharing TX taskq between hn instances
  
      It is off by default.  This eases further experimenting on this driver.
  
      Reviewed by:        adrian
      Approved by:        adrian (mentor)
      MFC after:  1 week
      Sponsored by:       Microsoft OSTC
      Differential Revision:      https://reviews.freebsd.org/D5272
  
  295742
      hyperv/hn: Always do transmission scheduling.
  
      This one gives the best performance so far.
  
      Reviewed by:        adrian
      Approved by:        adrian (mentor)
      MFC after:  1 week
      Sponsored by:       Microsoft OSTC
      Differential Revision:      https://reviews.freebsd.org/D5273

Modified:
  stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h
  stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h        Mon Jun 13 06:38:46 
2016        (r301859)
+++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h        Mon Jun 13 07:03:00 
2016        (r301860)
@@ -1031,7 +1031,6 @@ typedef struct hn_softc {
        struct task     hn_txeof_task;
 
        struct lro_ctrl hn_lro;
-       int             hn_lro_hiwat;
 
        /* Trust csum verification on host side */
        int             hn_trust_hcsum; /* HN_TRUST_HCSUM_ */

Modified: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c     Mon Jun 13 
06:38:46 2016        (r301859)
+++ stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c     Mon Jun 13 
07:03:00 2016        (r301860)
@@ -176,14 +176,11 @@ struct hn_txdesc {
 #define HN_CSUM_ASSIST_WIN8    (CSUM_TCP)
 #define HN_CSUM_ASSIST         (CSUM_IP | CSUM_UDP | CSUM_TCP)
 
-/* XXX move to netinet/tcp_lro.h */
-#define HN_LRO_HIWAT_MAX                               65535
-#define HN_LRO_HIWAT_DEF                               HN_LRO_HIWAT_MAX
+#define HN_LRO_LENLIM_DEF              (25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
-#define HN_LRO_HIWAT_MTULIM(ifp)                       (2 * (ifp)->if_mtu)
-#define HN_LRO_HIWAT_ISVALID(sc, hiwat)                        \
-    ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) ||   \
-     (hiwat) <= HN_LRO_HIWAT_MAX)
+#define HN_LRO_LENLIM_MIN(ifp)         (2 * (ifp)->if_mtu)
+
+#define HN_LRO_ACKCNT_DEF              1
 
 /*
  * Be aware that this sleepable mutex will exhibit WITNESS errors when
@@ -241,6 +238,11 @@ TUNABLE_INT("dev.hn.lro_entry_count", &h
 #endif
 #endif
 
+static int hn_share_tx_taskq = 0;
+TUNABLE_INT("hw.hn.share_tx_taskq", &hn_share_tx_taskq);
+
+static struct taskqueue        *hn_tx_taskq;
+
 /*
  * Forward declarations
  */
@@ -253,8 +255,9 @@ static void hn_start(struct ifnet *ifp);
 static void hn_start_txeof(struct ifnet *ifp);
 static int hn_ifmedia_upd(struct ifnet *ifp);
 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
-#ifdef HN_LRO_HIWAT
-static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS);
+#if __FreeBSD_version >= 1100099
+static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
@@ -265,15 +268,6 @@ static void hn_start_taskfunc(void *xsc,
 static void hn_txeof_taskfunc(void *xsc, int pending);
 static int hn_encap(struct hn_softc *, struct hn_txdesc *, struct mbuf **);
 
-static __inline void
-hn_set_lro_hiwat(struct hn_softc *sc, int hiwat)
-{
-       sc->hn_lro_hiwat = hiwat;
-#ifdef HN_LRO_HIWAT
-       sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
-#endif
-}
-
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
@@ -358,7 +352,6 @@ netvsc_attach(device_t dev)
        bzero(sc, sizeof(hn_softc_t));
        sc->hn_unit = unit;
        sc->hn_dev = dev;
-       sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF;
        sc->hn_direct_tx_size = hn_direct_tx_size;
        if (hn_trust_hosttcp)
                sc->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
@@ -367,10 +360,14 @@ netvsc_attach(device_t dev)
        if (hn_trust_hostip)
                sc->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 
-       sc->hn_tx_taskq = taskqueue_create_fast("hn_tx", M_WAITOK,
-           taskqueue_thread_enqueue, &sc->hn_tx_taskq);
-       taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
-           device_get_nameunit(dev));
+       if (hn_tx_taskq == NULL) {
+               sc->hn_tx_taskq = taskqueue_create_fast("hn_tx", M_WAITOK,
+                   taskqueue_thread_enqueue, &sc->hn_tx_taskq);
+               taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
+                   device_get_nameunit(dev));
+       } else {
+               sc->hn_tx_taskq = hn_tx_taskq;
+       }
        TASK_INIT(&sc->hn_start_task, 0, hn_start_taskfunc, sc);
        TASK_INIT(&sc->hn_txeof_task, 0, hn_txeof_taskfunc, sc);
 
@@ -442,8 +439,9 @@ netvsc_attach(device_t dev)
        /* Driver private LRO settings */
        sc->hn_lro.ifp = ifp;
 #endif
-#ifdef HN_LRO_HIWAT
-       sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
+#if __FreeBSD_version >= 1100099
+       sc->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
+       sc->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif /* INET || INET6 */
 
@@ -471,6 +469,13 @@ netvsc_attach(device_t dev)
            hn_tx_chimney_size < sc->hn_tx_chimney_max)
                sc->hn_tx_chimney_size = hn_tx_chimney_size;
 
+       /*
+        * Always schedule transmission instead of trying
+        * to do direct transmission.  This one gives the
+        * best performance so far.
+        */
+       sc->hn_sched_tx = 1;
+
        ctx = device_get_sysctl_ctx(dev);
        child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
@@ -480,10 +485,13 @@ netvsc_attach(device_t dev)
            CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed");
        SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried",
            CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries");
-#ifdef HN_LRO_HIWAT
-       SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat",
-           CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl,
-           "I", "LRO high watermark");
+#if __FreeBSD_version >= 1100099
+       SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
+           CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_lro_lenlim_sysctl, "IU",
+           "Max # of data bytes to be aggregated by LRO");
+       SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
+           CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_ackcnt_sysctl, "I",
+           "Max # of ACKs to be aggregated by LRO");
 #endif
        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
            CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_TCP,
@@ -616,7 +624,8 @@ netvsc_detach(device_t dev)
 
        taskqueue_drain(sc->hn_tx_taskq, &sc->hn_start_task);
        taskqueue_drain(sc->hn_tx_taskq, &sc->hn_txeof_task);
-       taskqueue_free(sc->hn_tx_taskq);
+       if (sc->hn_tx_taskq != hn_tx_taskq)
+               taskqueue_free(sc->hn_tx_taskq);
 
        ifmedia_removeall(&sc->hn_media);
 #if defined(INET) || defined(INET6)
@@ -1412,12 +1421,15 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, 
 
                /* Obtain and record requested MTU */
                ifp->if_mtu = ifr->ifr_mtu;
+
+#if __FreeBSD_version >= 1100099
                /*
-                * Make sure that LRO high watermark is still valid,
-                * after MTU change (the 2*MTU limit).
+                * Make sure that LRO aggregation length limit is still
+                * valid, after the MTU change.
                 */
-               if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat))
-                       hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp));
+               if (sc->hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
+                       sc->hn_lro.lro_length_lim = HN_LRO_LENLIM_MIN(ifp);
+#endif
 
                do {
                        NV_LOCK(sc);
@@ -1724,26 +1736,55 @@ hn_watchdog(struct ifnet *ifp)
 }
 #endif
 
-#ifdef HN_LRO_HIWAT
+#if __FreeBSD_version >= 1100099
+
 static int
-hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS)
+hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
        struct hn_softc *sc = arg1;
-       int hiwat, error;
+       unsigned int lenlim;
+       int error;
 
-       hiwat = sc->hn_lro_hiwat;
-       error = sysctl_handle_int(oidp, &hiwat, 0, req);
+       lenlim = sc->hn_lro.lro_length_lim;
+       error = sysctl_handle_int(oidp, &lenlim, 0, req);
        if (error || req->newptr == NULL)
                return error;
 
-       if (!HN_LRO_HIWAT_ISVALID(sc, hiwat))
+       if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
+           lenlim > TCP_LRO_LENGTH_MAX)
                return EINVAL;
 
-       if (sc->hn_lro_hiwat != hiwat)
-               hn_set_lro_hiwat(sc, hiwat);
+       sc->hn_lro.lro_length_lim = lenlim;
        return 0;
 }
-#endif /* HN_LRO_HIWAT */
+
+static int
+hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
+{
+       struct hn_softc *sc = arg1;
+       int ackcnt, error;
+
+       /*
+        * lro_ackcnt_lim is append count limit,
+        * +1 to turn it into aggregation limit.
+        */
+       ackcnt = sc->hn_lro.lro_ackcnt_lim + 1;
+       error = sysctl_handle_int(oidp, &ackcnt, 0, req);
+       if (error || req->newptr == NULL)
+               return error;
+
+       if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
+               return EINVAL;
+
+       /*
+        * Convert aggregation limit back to append
+        * count limit.
+        */
+       sc->hn_lro.lro_ackcnt_lim = ackcnt - 1;
+       return 0;
+}
+
+#endif
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
@@ -2029,6 +2070,28 @@ hn_txeof_taskfunc(void *xsc, int pending
        NV_UNLOCK(sc);
 }
 
+static void
+hn_tx_taskq_create(void *arg __unused)
+{
+       if (!hn_share_tx_taskq)
+               return;
+
+       hn_tx_taskq = taskqueue_create_fast("hn_tx", M_WAITOK,
+           taskqueue_thread_enqueue, &hn_tx_taskq);
+       taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
+}
+SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
+    hn_tx_taskq_create, NULL);
+
+static void
+hn_tx_taskq_destroy(void *arg __unused)
+{
+       if (hn_tx_taskq != NULL)
+               taskqueue_free(hn_tx_taskq);
+}
+SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
+    hn_tx_taskq_destroy, NULL);
+
 static device_method_t netvsc_methods[] = {
         /* Device interface */
         DEVMETHOD(device_probe,         netvsc_probe),
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10
To unsubscribe, send any mail to "[email protected]"

Reply via email to