On Sat, Dec 05, 2015 at 06:11:51PM +0100, Jonathan Matthew wrote:
> The main interesting bit here is the txeof and start loops, which previously
> operated based on the prod/cons indices and the contents of the tx queue,
> but now just uses the indices as that's the only way to get a consistent view
> of the tx queue state.
> 
> At the moment I don't think the tx ring is big enough to use IFQ_DEQUEUE
> instead of ifq_deq_begin/commit, but maybe I'm wrong about that.

dlg@ pointed out that the way the driver modifies rl_flags would be unsafe with
this diff applied, since it sets and clears bits during interrupts and ioctl
handling with no locking.  The only change it makes during interrupts is to
flip RL_FLAG_TIMERINTR, which otherwise is only used during re_init and
re_stop, so this updated diff splits it out into a separate field.

ok?


Index: dev/ic/re.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.187
diff -u -p -r1.187 re.c
--- dev/ic/re.c 25 Nov 2015 03:09:58 -0000      1.187
+++ dev/ic/re.c 26 Dec 2015 03:43:35 -0000
@@ -120,6 +120,7 @@
 #include <sys/device.h>
 #include <sys/timeout.h>
 #include <sys/socket.h>
+#include <sys/atomic.h>
 
 #include <machine/bus.h>
 
@@ -151,7 +152,7 @@ int redebug = 0;
 
 static inline void re_set_bufaddr(struct rl_desc *, bus_addr_t);
 
-int    re_encap(struct rl_softc *, struct mbuf *, int *);
+int    re_encap(struct rl_softc *, struct mbuf *, struct rl_txq *, int *);
 
 int    re_newbuf(struct rl_softc *);
 int    re_rx_list_init(struct rl_softc *);
@@ -1448,18 +1449,14 @@ re_txeof(struct rl_softc *sc)
        struct ifnet    *ifp;
        struct rl_txq   *txq;
        uint32_t        txstat;
-       int             idx, descidx, tx = 0;
+       int             idx, descidx, tx_free, freed = 0;
 
        ifp = &sc->sc_arpcom.ac_if;
 
-       for (idx = sc->rl_ldata.rl_txq_considx;; idx = RL_NEXT_TXQ(sc, idx)) {
+       for (idx = sc->rl_ldata.rl_txq_considx;
+           idx != sc->rl_ldata.rl_txq_prodidx; idx = RL_NEXT_TXQ(sc, idx)) {
                txq = &sc->rl_ldata.rl_txq[idx];
 
-               if (txq->txq_mbuf == NULL) {
-                       KASSERT(idx == sc->rl_ldata.rl_txq_prodidx);
-                       break;
-               }
-
                descidx = txq->txq_descidx;
                RL_TXDESCSYNC(sc, descidx,
                    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
@@ -1470,9 +1467,7 @@ re_txeof(struct rl_softc *sc)
                if (txstat & RL_TDESC_CMD_OWN)
                        break;
 
-               tx = 1;
-               sc->rl_ldata.rl_tx_free += txq->txq_nsegs;
-               KASSERT(sc->rl_ldata.rl_tx_free <= sc->rl_ldata.rl_tx_desc_cnt);
+               freed += txq->txq_nsegs;
                bus_dmamap_sync(sc->sc_dmat, txq->txq_dmamap,
                    0, txq->txq_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
                bus_dmamap_unload(sc->sc_dmat, txq->txq_dmamap);
@@ -1487,9 +1482,13 @@ re_txeof(struct rl_softc *sc)
                        ifp->if_opackets++;
        }
 
-       sc->rl_ldata.rl_txq_considx = idx;
+       if (freed == 0)
+               return (0);
 
-       ifq_clr_oactive(&ifp->if_snd);
+       tx_free = atomic_add_int_nv(&sc->rl_ldata.rl_tx_free, freed);
+       KASSERT(tx_free <= sc->rl_ldata.rl_tx_desc_cnt);
+
+       sc->rl_ldata.rl_txq_considx = idx;
 
        /*
         * Some chips will ignore a second TX request issued while an
@@ -1498,12 +1497,14 @@ re_txeof(struct rl_softc *sc)
         * to restart the channel here to flush them out. This only
         * seems to be required with the PCIe devices.
         */
-       if (sc->rl_ldata.rl_tx_free < sc->rl_ldata.rl_tx_desc_cnt)
+       if (tx_free < sc->rl_ldata.rl_tx_desc_cnt)
                CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
-       else
+       else {
+               ifq_clr_oactive(&ifp->if_snd);
                ifp->if_timer = 0;
+       }
 
-       return (tx);
+       return (1);
 }
 
 void
@@ -1566,13 +1567,15 @@ re_intr(void *arg)
                }
 
                if (status & RL_ISR_SYSTEM_ERR) {
+                       KERNEL_LOCK();
                        re_init(ifp);
+                       KERNEL_UNLOCK();
                        claimed = 1;
                }
        }
 
        if (sc->rl_imtype == RL_IMTYPE_SIM) {
-               if ((sc->rl_flags & RL_FLAG_TIMERINTR)) {
+               if (sc->rl_timerintr) {
                        if ((tx | rx) == 0) {
                                /*
                                 * Nothing needs to be processed, fallback
@@ -1599,7 +1602,11 @@ re_intr(void *arg)
                }
        }
 
-       re_start(ifp);
+       if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+               KERNEL_LOCK();
+               re_start(ifp);
+               KERNEL_UNLOCK();
+       }
 
        CSR_WRITE_2(sc, RL_IMR, sc->rl_intrs);
 
@@ -1607,7 +1614,7 @@ re_intr(void *arg)
 }
 
 int
-re_encap(struct rl_softc *sc, struct mbuf *m, int *idx)
+re_encap(struct rl_softc *sc, struct mbuf *m, struct rl_txq *txq, int *used)
 {
        bus_dmamap_t    map;
        struct mbuf     *mp, mh;
@@ -1616,7 +1623,6 @@ re_encap(struct rl_softc *sc, struct mbu
        struct ip       *ip;
        struct rl_desc  *d;
        u_int32_t       cmdstat, vlanctl = 0, csum_flags = 0;
-       struct rl_txq   *txq;
 
        /*
         * Set up checksum offload. Note: checksum offload bits must
@@ -1669,7 +1675,6 @@ re_encap(struct rl_softc *sc, struct mbu
                }
        }
 
-       txq = &sc->rl_ldata.rl_txq[*idx];
        map = txq->txq_dmamap;
 
        error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
@@ -1710,7 +1715,7 @@ re_encap(struct rl_softc *sc, struct mbu
                nsegs++;
        }
 
-       if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
+       if (sc->rl_ldata.rl_tx_free - (*used + nsegs) <= 1) {
                error = EFBIG;
                goto fail_unload;
        }
@@ -1812,10 +1817,9 @@ re_encap(struct rl_softc *sc, struct mbu
        txq->txq_descidx = lastidx;
        txq->txq_nsegs = nsegs;
 
-       sc->rl_ldata.rl_tx_free -= nsegs;
        sc->rl_ldata.rl_tx_nextfree = curidx;
 
-       *idx = RL_NEXT_TXQ(sc, *idx);
+       *used += nsegs;
 
        return (0);
 
@@ -1834,30 +1838,25 @@ re_start(struct ifnet *ifp)
 {
        struct rl_softc *sc = ifp->if_softc;
        struct mbuf     *m;
-       int             idx, queued = 0, error;
+       int             idx, used = 0, txq_free, error;
 
        if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
                return;
        if ((sc->rl_flags & RL_FLAG_LINK) == 0)
                return;
-       if (IFQ_IS_EMPTY(&ifp->if_snd))
-               return;
 
+       txq_free = sc->rl_ldata.rl_txq_considx;
        idx = sc->rl_ldata.rl_txq_prodidx;
+       if (idx >= txq_free)
+               txq_free += RL_TX_QLEN;
+       txq_free -= idx;
 
-       for (;;) {
+       while (txq_free > 1) {
                m = ifq_deq_begin(&ifp->if_snd);
                if (m == NULL)
                        break;
 
-               if (sc->rl_ldata.rl_txq[idx].txq_mbuf != NULL) {
-                       KASSERT(idx == sc->rl_ldata.rl_txq_considx);
-                       ifq_deq_rollback(&ifp->if_snd, m);
-                       ifq_set_oactive(&ifp->if_snd);
-                       break;
-               }
-
-               error = re_encap(sc, m, &idx);
+               error = re_encap(sc, m, &sc->rl_ldata.rl_txq[idx], &used);
                if (error != 0 && error != ENOBUFS) {
                        ifq_deq_rollback(&ifp->if_snd, m);
                        ifq_set_oactive(&ifp->if_snd);
@@ -1869,31 +1868,25 @@ re_start(struct ifnet *ifp)
                        continue;
                }
 
-               /* now we are committed to transmit the packet */
                ifq_deq_commit(&ifp->if_snd, m);
-               queued++;
 
 #if NBPFILTER > 0
-               /*
-                * If there's a BPF listener, bounce a copy of this frame
-                * to him.
-                */
                if (ifp->if_bpf)
                        bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 #endif
+               idx = RL_NEXT_TXQ(sc, idx);
+               txq_free--;
        }
 
-       if (queued == 0)
+       if (used == 0)
                return;
-
+       
+       ifp->if_timer = 5;
        sc->rl_ldata.rl_txq_prodidx = idx;
+       atomic_sub_int(&sc->rl_ldata.rl_tx_free, used);
+       KASSERT(sc->rl_ldata.rl_tx_free >= 0);
 
        CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
-
-       /*
-        * Set a timeout in case the chip goes out to lunch.
-        */
-       ifp->if_timer = 5;
 }
 
 int
@@ -2142,14 +2135,13 @@ re_stop(struct ifnet *ifp)
        sc = ifp->if_softc;
 
        ifp->if_timer = 0;
-       sc->rl_flags &= ~(RL_FLAG_LINK|RL_FLAG_TIMERINTR);
+       sc->rl_flags &= ~RL_FLAG_LINK;
+       sc->rl_timerintr = 0;
 
        timeout_del(&sc->timer_handle);
        ifp->if_flags &= ~IFF_RUNNING;
        ifq_clr_oactive(&ifp->if_snd);
 
-       mii_down(&sc->sc_mii);
-
        /*
         * Disable accepting frames to put RX MAC into idle state.
         * Otherwise it's possible to get frames while stop command
@@ -2196,6 +2188,10 @@ re_stop(struct ifnet *ifp)
                sc->rl_head = sc->rl_tail = NULL;
        }
 
+       intr_barrier(sc->sc_ih);
+
+       mii_down(&sc->sc_mii);
+
        /* Free the TX list buffers. */
        for (i = 0; i < RL_TX_QLEN; i++) {
                if (sc->rl_ldata.rl_txq[i].txq_mbuf != NULL) {
@@ -2276,7 +2272,7 @@ re_setup_sim_im(struct rl_softc *sc)
                CSR_WRITE_4(sc, RL_TIMERINT_8169, ticks);
        }
        CSR_WRITE_4(sc, RL_TIMERCNT, 1); /* reload */
-       sc->rl_flags |= RL_FLAG_TIMERINTR;
+       sc->rl_timerintr = 1;
 }
 
 void
@@ -2286,7 +2282,7 @@ re_disable_sim_im(struct rl_softc *sc)
                CSR_WRITE_4(sc, RL_TIMERINT, 0);
        else 
                CSR_WRITE_4(sc, RL_TIMERINT_8169, 0);
-       sc->rl_flags &= ~RL_FLAG_TIMERINTR;
+       sc->rl_timerintr = 0;
 }
 
 void
Index: dev/ic/rtl81x9reg.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rtl81x9reg.h,v
retrieving revision 1.96
diff -u -p -r1.96 rtl81x9reg.h
--- dev/ic/rtl81x9reg.h 2 Nov 2015 00:08:50 -0000       1.96
+++ dev/ic/rtl81x9reg.h 26 Dec 2015 03:43:35 -0000
@@ -918,6 +918,7 @@ struct rl_softc {
 #define        RL_IMTYPE_NONE          0
 #define        RL_IMTYPE_SIM           1       /* simulated */
 #define        RL_IMTYPE_HW            2       /* hardware based */
+       int                     rl_timerintr;
 };
 
 /*
Index: dev/pci/if_re_pci.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_re_pci.c,v
retrieving revision 1.49
diff -u -p -r1.49 if_re_pci.c
--- dev/pci/if_re_pci.c 24 Nov 2015 17:11:39 -0000      1.49
+++ dev/pci/if_re_pci.c 26 Dec 2015 03:43:35 -0000
@@ -156,8 +156,8 @@ re_pci_attach(struct device *parent, str
                return;
        }
        intrstr = pci_intr_string(pc, ih);
-       psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, re_intr, sc,
-           sc->sc_dev.dv_xname);
+       psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, re_intr,
+           sc, sc->sc_dev.dv_xname);
        if (psc->sc_ih == NULL) {
                printf(": couldn't establish interrupt");
                if (intrstr != NULL)

Reply via email to