Author: sbruno
Date: Tue Jan 24 14:48:32 2017
New Revision: 312696
URL: https://svnweb.freebsd.org/changeset/base/312696

Log:
  iflib:
     Add internal tracking of smp startup status to reliably figure out
     what methods are to be used to get gtaskqueue up and running.
  
  e1000:
     Calculating this pointer gives undefined behaviour when (last == -1)
     (it is before the buffer).  The pointer is always followed.  Panics
     occurred when it points to an unmapped page.  Otherwise, the pointed-to
     garbage tends to not have the E1000_TXD_STAT_DD bit set in it, so in the
     broken case the loop was usually null and the function just returned, and
     this was acidentally correct.
  
  Submitted by: bde
  Reviewed by:  Matt Macy <mm...@nextbsd.org>

Modified:
  head/sys/dev/e1000/em_txrx.c
  head/sys/kern/subr_gtaskqueue.c
  head/sys/net/iflib.c
  head/sys/sys/gtaskqueue.h

Modified: head/sys/dev/e1000/em_txrx.c
==============================================================================
--- head/sys/dev/e1000/em_txrx.c        Tue Jan 24 12:15:10 2017        
(r312695)
+++ head/sys/dev/e1000/em_txrx.c        Tue Jan 24 14:48:32 2017        
(r312696)
@@ -408,10 +408,13 @@ em_isc_txd_credits_update(void *arg, uin
        cidx = cidx_init;
        buf = &txr->tx_buffers[cidx];
        tx_desc = &txr->tx_base[cidx];
-        last = buf->eop;
+       last = buf->eop;
+       if (last == -1)
+               return (processed);
        eop_desc = &txr->tx_base[last];
 
-       DPRINTF(iflib_get_dev(adapter->ctx), "credits_update: cidx_init=%d 
clear=%d last=%d\n",
+       DPRINTF(iflib_get_dev(adapter->ctx),
+                     "credits_update: cidx_init=%d clear=%d last=%d\n",
                      cidx_init, clear, last);
        /*
         * What this does is get the index of the
@@ -420,7 +423,7 @@ em_isc_txd_credits_update(void *arg, uin
         * simple comparison on the inner while loop.
         */
        if (++last == scctx->isc_ntxd[0])
-            last = 0;
+               last = 0;
        done = last;
 
 
@@ -436,7 +439,7 @@ em_isc_txd_credits_update(void *arg, uin
                        tx_desc++;
                        buf++;
                        processed++;
-                 
+
                        /* wrap the ring ? */
                        if (++cidx == scctx->isc_ntxd[0]) {
                                cidx = 0;

Modified: head/sys/kern/subr_gtaskqueue.c
==============================================================================
--- head/sys/kern/subr_gtaskqueue.c     Tue Jan 24 12:15:10 2017        
(r312695)
+++ head/sys/kern/subr_gtaskqueue.c     Tue Jan 24 14:48:32 2017        
(r312696)
@@ -630,6 +630,29 @@ taskqgroup_find(struct taskqgroup *qgrou
 
        return (idx);
 }
+/*
+ * smp_started is unusable since it is not set for UP kernels or even for
+ * SMP kernels when there is 1 CPU.  This is usually handled by adding a
+ * (mp_ncpus == 1) test, but that would be broken here since we need to
+ * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
+ * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
+ *
+ * So maintain our own flag.  It must be set after all CPUs are started
+ * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
+ * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
+ * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
+ * simpler for adjustment to pass a flag indicating if it is delayed.
+ */ 
+static int tqg_smp_started;
+
+static void
+tqg_record_smp_started(void *arg)
+{
+       tqg_smp_started = 1;
+}
+
+SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
+       tqg_record_smp_started, NULL);
 
 void
 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
@@ -647,7 +670,7 @@ taskqgroup_attach(struct taskqgroup *qgr
        qgroup->tqg_queue[qid].tgc_cnt++;
        LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
        gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
-       if (irq != -1 && (smp_started || mp_ncpus == 1)) {
+       if (irq != -1 && tqg_smp_started ) {
                gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
                CPU_ZERO(&mask);
                CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
@@ -697,7 +720,7 @@ taskqgroup_attach_cpu(struct taskqgroup 
        gtask->gt_irq = irq;
        gtask->gt_cpu = cpu;
        mtx_lock(&qgroup->tqg_lock);
-       if (smp_started || mp_ncpus == 1) {
+       if (tqg_smp_started)
                for (i = 0; i < qgroup->tqg_cnt; i++)
                        if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
                                qid = i;
@@ -731,7 +754,7 @@ taskqgroup_attach_cpu_deferred(struct ta
        qid = -1;
        irq = gtask->gt_irq;
        cpu = gtask->gt_cpu;
-       MPASS(smp_started || mp_ncpus == 1);
+       MPASS(tqg_smp_started);
        mtx_lock(&qgroup->tqg_lock);
        for (i = 0; i < qgroup->tqg_cnt; i++)
                if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
@@ -824,9 +847,10 @@ _taskqgroup_adjust(struct taskqgroup *qg
 
        mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 
-       if (cnt < 1 || cnt * stride > mp_ncpus || (!smp_started && (mp_ncpus != 
1))) {
-               printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: 
%d smp_started: %d\n",
-                          cnt, stride, mp_ncpus, smp_started);
+       if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
+               printf("%s: failed cnt: %d stride: %d "
+                      "mp_ncpus: %d smp_started: %d\n",
+                       __func__, cnt, stride, mp_ncpus, smp_started);
                return (EINVAL);
        }
        if (qgroup->tqg_adjusting) {

Modified: head/sys/net/iflib.c
==============================================================================
--- head/sys/net/iflib.c        Tue Jan 24 12:15:10 2017        (r312695)
+++ head/sys/net/iflib.c        Tue Jan 24 14:48:32 2017        (r312696)
@@ -1193,13 +1193,36 @@ iflib_dma_free_multi(iflib_dma_info_t *d
                iflib_dma_free(*dmaiter);
 }
 
+#ifdef EARLY_AP_STARTUP
+static const int iflib_started = 1;
+#else
+/*
+ * We used to abuse the smp_started flag to decide if the queues have been
+ * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
+ * That gave bad races, since the SYSINIT() runs strictly after smp_started
+ * is set.  Run a SYSINIT() strictly after that to just set a usable
+ * completion flag.
+ */
+
+static int iflib_started;
+
+static void
+iflib_record_started(void *arg)
+{
+       iflib_started = 1;
+}
+
+SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
+       iflib_record_started, NULL);
+#endif
+
 static int
 iflib_fast_intr(void *arg)
 {
        iflib_filter_info_t info = arg;
        struct grouptask *gtask = info->ifi_task;
 
-       if (!smp_started && mp_ncpus > 1)
+       if (!iflib_started)
                return (FILTER_HANDLED);
 
        DBG_COUNTER_INC(fast_intrs);
@@ -3728,7 +3751,16 @@ iflib_device_register(device_t dev, void
                device_printf(dev, "qset structure setup failed %d\n", err);
                goto fail_queues;
        }
-
+       /*
+        * Group taskqueues aren't properly set up until SMP is started,
+        * so we disable interrupts until we can handle them post
+        * SI_SUB_SMP.
+        *
+        * XXX: disabling interrupts doesn't actually work, at least for
+        * the non-MSI case.  When they occur before SI_SUB_SMP completes,
+        * we do null handling and depend on this not causing too large an
+        * interrupt storm.
+        */
        IFDI_INTR_DISABLE(ctx);
        if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
                device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
@@ -4556,13 +4588,6 @@ iflib_legacy_setup(if_ctx_t ctx, driver_
        void *q;
        int err;
 
-       /*
-        * group taskqueues aren't properly set up until SMP is started
-        * so we disable interrupts until we can handle them post
-        * SI_SUB_SMP
-        */
-       IFDI_INTR_DISABLE(ctx);
-
        q = &ctx->ifc_rxqs[0];
        info = &rxq[0].ifr_filter_info;
        gtask = &rxq[0].ifr_task;

Modified: head/sys/sys/gtaskqueue.h
==============================================================================
--- head/sys/sys/gtaskqueue.h   Tue Jan 24 12:15:10 2017        (r312695)
+++ head/sys/sys/gtaskqueue.h   Tue Jan 24 14:48:32 2017        (r312696)
@@ -81,7 +81,7 @@ int   taskqgroup_adjust(struct taskqgroup 
 extern struct taskqgroup *qgroup_##name
 
 
-#if (!defined(SMP) || defined(EARLY_AP_STARTUP))
+#ifdef EARLY_AP_STARTUP
 #define TASKQGROUP_DEFINE(name, cnt, stride)                           \
                                                                        \
 struct taskqgroup *qgroup_##name;                                      \
@@ -95,7 +95,8 @@ taskqgroup_define_##name(void *arg)                           
        
                                                                        \
 SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,             \
        taskqgroup_define_##name, NULL)
-#else /* SMP && !EARLY_AP_STARTUP */
+
+#else /* !EARLY_AP_STARTUP */
 #define TASKQGROUP_DEFINE(name, cnt, stride)                           \
                                                                        \
 struct taskqgroup *qgroup_##name;                                      \
@@ -104,15 +105,6 @@ static void                                                
                \
 taskqgroup_define_##name(void *arg)                                    \
 {                                                                      \
        qgroup_##name = taskqgroup_create(#name);                       \
-       /* Adjustment will be null unless smp_cpus == 1. */             \
-       /*                                                              \
-        * XXX this was intended to fix the smp_cpus == 1 case, but     \
-        * doesn't actually work for that.  It gives thes same strange  \
-        * panic as adjustment at SI_SUB_INIT_IF:SI_ORDER_ANY for a     \
-        * device that works with a pure UP kernel.                     \
-        */                                                             \
-       /* XXX this code is common now, so should not be ifdefed. */    \
-       taskqgroup_adjust(qgroup_##name, (cnt), (stride));              \
 }                                                                      \
                                                                        \
 SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,             \
@@ -121,17 +113,13 @@ SYSINIT(taskqgroup_##name, SI_SUB_INIT_I
 static void                                                            \
 taskqgroup_adjust_##name(void *arg)                                    \
 {                                                                      \
-       /*                                                              \
-        * Adjustment when smp_cpus > 1 only works accidentally         \
-        * (when there is no device interrupt before adjustment).       \
-        */                                                             \
        taskqgroup_adjust(qgroup_##name, (cnt), (stride));              \
 }                                                                      \
                                                                        \
 SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,               \
        taskqgroup_adjust_##name, NULL);                                \
 
-#endif /* !SMP || EARLY_AP_STARTUP */
+#endif /* EARLY_AP_STARTUP */
 
 TASKQGROUP_DECLARE(net);
 
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to