Here is a new diff to make ix(4) mpsafe.  Should now longer get stuck
in the OACTIVE state.  Tests more than welcome.


Index: if_ix.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.129
diff -u -p -r1.129 if_ix.c
--- if_ix.c     25 Nov 2015 03:09:59 -0000      1.129
+++ if_ix.c     3 Dec 2015 16:50:26 -0000
@@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
        sc->osdep.os_sc = sc;
        sc->osdep.os_pa = *pa;
 
-       mtx_init(&sc->rx_mtx, IPL_NET);
-
        /* Set up the timer callout */
        timeout_set(&sc->timer, ixgbe_local_timer, sc);
        timeout_set(&sc->rx_refill, ixgbe_rxrefill, sc);
@@ -385,6 +383,16 @@ ixgbe_start(struct ifnet * ifp)
                if (ixgbe_encap(txr, m_head)) {
                        ifq_deq_rollback(&ifp->if_snd, m_head);
                        ifq_set_oactive(&ifp->if_snd);
+                       /*
+                        *  Make sure there are still packets on the
+                        *  ring.  The interrupt handler may have
+                        *  cleaned up the ring before we were able to
+                        *  set the IF_OACTIVE flag.
+                        */
+                       if (txr->tx_avail == sc->num_tx_desc) {
+                               ifq_clr_oactive(&ifp->if_snd);
+                               continue;
+                       }
                        break;
                }
 
@@ -527,10 +535,7 @@ ixgbe_watchdog(struct ifnet * ifp)
 
        /*
         * The timer is set to 5 every time ixgbe_start() queues a packet.
-        * Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
-        * least one descriptor.
-        * Finally, anytime all descriptors are clean the timer is
-        * set to 0.
+        * Anytime all descriptors are clean the timer is set to 0.
         */
        for (i = 0; i < sc->num_queues; i++, txr++) {
                if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
@@ -862,7 +867,7 @@ ixgbe_intr(void *arg)
        struct tx_ring  *txr = sc->tx_rings;
        struct ixgbe_hw *hw = &sc->hw;
        uint32_t         reg_eicr;
-       int              i, refill = 0, was_active = 0;
+       int              i, refill = 0;
 
        reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
        if (reg_eicr == 0) {
@@ -872,11 +877,8 @@ ixgbe_intr(void *arg)
 
        if (ISSET(ifp->if_flags, IFF_RUNNING)) {
                ixgbe_rxeof(que);
-               refill = 1;
-
-               if (ifq_is_oactive(&ifp->if_snd))
-                       was_active = 1;
                ixgbe_txeof(txr);
+               refill = 1;
        }
 
        if (refill) {
@@ -892,6 +894,8 @@ ixgbe_intr(void *arg)
        if (reg_eicr & IXGBE_EICR_LSC) {
                KERNEL_LOCK();
                ixgbe_update_link_status(sc);
+               if (!IFQ_IS_EMPTY(&ifp->if_snd))
+                       ixgbe_start(ifp);
                KERNEL_UNLOCK();
        }
 
@@ -913,12 +917,6 @@ ixgbe_intr(void *arg)
                }
        }
 
-       if (was_active) {
-               KERNEL_LOCK();
-               ixgbe_start(ifp);
-               KERNEL_UNLOCK();
-       }
-
        /* Check for fan failure */
        if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
            (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1061,17 +1059,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
                cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
 #endif
 
-       /*
-        * Force a cleanup if number of TX descriptors
-        * available is below the threshold. If it fails
-        * to get above, then abort transmit.
-        */
-       if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
-               ixgbe_txeof(txr);
-               /* Make sure things have improved */
-               if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
-                       return (ENOBUFS);
-       }
+       /* Check that we have least the minimal number of TX descriptors. */
+       if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
+               return (ENOBUFS);
 
        /*
         * Important to capture the first descriptor
@@ -1135,8 +1125,6 @@ ixgbe_encap(struct tx_ring *txr, struct 
 
        txd->read.cmd_type_len |=
            htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
-       txr->tx_avail -= map->dm_nsegs;
-       txr->next_avail_desc = i;
 
        txbuf->m_head = m_head;
        /*
@@ -1154,6 +1142,11 @@ ixgbe_encap(struct tx_ring *txr, struct 
        txbuf = &txr->tx_buffers[first];
        txbuf->eop_index = last;
 
+       membar_producer();
+
+       atomic_sub_int(&txr->tx_avail, map->dm_nsegs);
+       txr->next_avail_desc = i;
+
        ++txr->tx_packets;
        return (0);
 
@@ -1322,6 +1315,10 @@ ixgbe_stop(void *arg)
        /* reprogram the RAR[0] in case user changed it. */
        ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
 
+       intr_barrier(sc->tag);
+
+       KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
+
        /* Should we really clear all structures on stop? */
        ixgbe_free_transmit_structures(sc);
        ixgbe_free_receive_structures(sc);
@@ -2202,11 +2199,13 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
        tx_buffer->m_head = NULL;
        tx_buffer->eop_index = -1;
 
+       membar_producer();
+
        /* We've consumed the first desc, adjust counters */
        if (++ctxd == sc->num_tx_desc)
                ctxd = 0;
        txr->next_avail_desc = ctxd;
-       --txr->tx_avail;
+       atomic_dec_int(&txr->tx_avail);
 
        return (0);
 }
@@ -2320,10 +2319,12 @@ ixgbe_tso_setup(struct tx_ring *txr, str
 
        TXD->seqnum_seed = htole32(0);
 
+       membar_producer();
+
        if (++ctxd == sc->num_tx_desc)
                ctxd = 0;
 
-       txr->tx_avail--;
+       atomic_dec_int(&txr->tx_avail);
        txr->next_avail_desc = ctxd;
        *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
        *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
@@ -2345,6 +2346,7 @@ ixgbe_txeof(struct tx_ring *txr)
        struct ix_softc                 *sc = txr->sc;
        struct ifnet                    *ifp = &sc->arpcom.ac_if;
        uint32_t                         first, last, done, processed;
+       uint32_t                         num_avail;
        struct ixgbe_tx_buf             *tx_buffer;
        struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
 
@@ -2356,23 +2358,19 @@ ixgbe_txeof(struct tx_ring *txr)
                return FALSE;
        }
 
-       KERNEL_LOCK();
+       membar_consumer();
 
        processed = 0;
        first = txr->next_to_clean;
        /* was the txt queue cleaned up in the meantime */
-       if (txr->tx_buffers == NULL) {
-               KERNEL_UNLOCK();
+       if (txr->tx_buffers == NULL)
                return FALSE;
-       }
        tx_buffer = &txr->tx_buffers[first];
        /* For cleanup we just use legacy struct */
        tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
        last = tx_buffer->eop_index;
-       if (last == -1) {
-               KERNEL_UNLOCK();
+       if (last == -1)
                return FALSE;
-       }
        eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
 
        /*
@@ -2394,7 +2392,6 @@ ixgbe_txeof(struct tx_ring *txr)
                        tx_desc->upper.data = 0;
                        tx_desc->lower.data = 0;
                        tx_desc->buffer_addr = 0;
-                       ++txr->tx_avail;
                        ++processed;
 
                        if (tx_buffer->m_head) {
@@ -2436,31 +2433,24 @@ ixgbe_txeof(struct tx_ring *txr)
 
        txr->next_to_clean = first;
 
+       num_avail = atomic_add_int_nv(&txr->tx_avail, processed);
+
+       /* All clean, turn off the timer */
+       if (num_avail == sc->num_tx_desc)
+               ifp->if_timer = 0;
+
        /*
         * If we have enough room, clear IFF_OACTIVE to tell the stack that
-        * it is OK to send packets. If there are no pending descriptors,
-        * clear the timeout. Otherwise, if some descriptors have been freed,
-        * restart the timeout.
+        * it is OK to send packets.
         */
-       if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD) {
+       if (ifq_is_oactive(&ifp->if_snd) &&
+           num_avail > IXGBE_TX_OP_THRESHOLD) {
                ifq_clr_oactive(&ifp->if_snd);
-
-               /* If all are clean turn off the timer */
-               if (txr->tx_avail == sc->num_tx_desc) {
-                       ifp->if_timer = 0;
-                       txr->watchdog_timer = 0;
-                       KERNEL_UNLOCK();
-                       return FALSE;
-               }
-               /* Some were cleaned, so reset timer */
-               else if (processed) {
-                       ifp->if_timer = IXGBE_TX_TIMEOUT;
-                       txr->watchdog_timer = IXGBE_TX_TIMEOUT;
-               }
+               KERNEL_LOCK();
+               ixgbe_start(ifp);
+               KERNEL_UNLOCK();
        }
 
-       KERNEL_UNLOCK();
-
        return TRUE;
 }
 
@@ -2610,8 +2600,6 @@ ixgbe_rxfill(struct rx_ring *rxr)
        u_int            slots;
        int              i;
 
-       mtx_enter(&sc->rx_mtx);
-
        i = rxr->last_desc_filled;
        for (slots = if_rxr_get(&rxr->rx_ring, sc->num_rx_desc);
            slots > 0; slots--) {
@@ -2627,8 +2615,6 @@ ixgbe_rxfill(struct rx_ring *rxr)
 
        if_rxr_put(&rxr->rx_ring, slots);
 
-       mtx_leave(&sc->rx_mtx);
-
        return (post);
 }
 
@@ -2796,10 +2782,8 @@ ixgbe_free_receive_structures(struct ix_
        struct rx_ring *rxr;
        int             i;
 
-       mtx_enter(&sc->rx_mtx);
        for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
                if_rxr_init(&rxr->rx_ring, 0, 0);
-       mtx_leave(&sc->rx_mtx);
 
        for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
                ixgbe_free_receive_buffers(rxr);
@@ -2853,7 +2837,6 @@ ixgbe_rxeof(struct ix_queue *que)
        struct rx_ring          *rxr = que->rxr;
        struct ifnet            *ifp = &sc->arpcom.ac_if;
        struct mbuf_list         ml = MBUF_LIST_INITIALIZER();
-       struct mbuf_list         free_ml = MBUF_LIST_INITIALIZER();
        struct mbuf             *mp, *sendmp;
        uint8_t                  eop = 0;
        uint16_t                 len, vtag;
@@ -2866,7 +2849,6 @@ ixgbe_rxeof(struct ix_queue *que)
        if (!ISSET(ifp->if_flags, IFF_RUNNING))
                return FALSE;
 
-       mtx_enter(&sc->rx_mtx);
        i = rxr->next_to_check;
        while (if_rxr_inuse(&rxr->rx_ring) > 0) {
                bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
@@ -2901,11 +2883,11 @@ ixgbe_rxeof(struct ix_queue *que)
                        sc->dropped_pkts++;
 
                        if (rxbuf->fmp) {
-                               ml_enqueue(&free_ml, rxbuf->fmp);
+                               m_freem(rxbuf->fmp);
                                rxbuf->fmp = NULL;
                        }
 
-                       ml_enqueue(&free_ml, mp);
+                       m_freem(mp);
                        rxbuf->buf = NULL;
                        goto next_desc;
                }
@@ -2983,9 +2965,6 @@ next_desc:
                        i = 0;
        }
        rxr->next_to_check = i;
-       mtx_leave(&sc->rx_mtx);
-
-       ml_purge(&free_ml);
 
        if_input(ifp, &ml);
 
Index: if_ix.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ix.h,v
retrieving revision 1.29
diff -u -p -r1.29 if_ix.h
--- if_ix.h     11 Sep 2015 13:02:28 -0000      1.29
+++ if_ix.h     3 Dec 2015 16:50:26 -0000
@@ -77,10 +77,9 @@
 #define IXGBE_TX_TIMEOUT                   5   /* set to 5 seconds */
 
 /*
- * This parameters control when the driver calls the routine to reclaim
- * transmit descriptors.
+ * Thise parameter controls the minimum number of available transmit
+ * descriptors needed before we attempt transmission of a packet.
  */
-#define IXGBE_TX_CLEANUP_THRESHOLD     (sc->num_tx_desc / 16)
 #define IXGBE_TX_OP_THRESHOLD          (sc->num_tx_desc / 32)
 
 #define IXGBE_MAX_FRAME_SIZE   9216
@@ -170,7 +169,7 @@ struct tx_ring {
        union ixgbe_adv_tx_desc *tx_base;
        struct ixgbe_tx_buf     *tx_buffers;
        struct ixgbe_dma_alloc  txdma;
-       volatile uint16_t       tx_avail;
+       volatile uint32_t       tx_avail;
        uint32_t                next_avail_desc;
        uint32_t                next_to_clean;
        enum {
@@ -277,7 +276,6 @@ struct ix_softc {
         * Receive rings:
         *      Allocated at run time, an array of rings.
         */
-       struct mutex            rx_mtx;
        struct rx_ring          *rx_rings;
        uint64_t                que_mask;
        int                     num_rx_desc;
Index: ixgbe.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/ixgbe.h,v
retrieving revision 1.20
diff -u -p -r1.20 ixgbe.h
--- ixgbe.h     24 Nov 2015 17:11:40 -0000      1.20
+++ ixgbe.h     3 Dec 2015 16:50:26 -0000
@@ -52,6 +52,7 @@
 #include <sys/timeout.h>
 #include <sys/pool.h>
 #include <sys/rwlock.h>
+#include <sys/atomic.h>
 
 #include <net/if.h>
 #include <net/bpf.h>

Reply via email to