> On 26 Dec 2015, at 5:49 PM, Jonathan Matthew <jonat...@d14n.org> wrote: > > On Sat, Dec 05, 2015 at 06:11:51PM +0100, Jonathan Matthew wrote: >> The main interesting bit here is the txeof and start loops, which previously >> operated based on the prod/cons indices and the contents of the tx queue, >> but now just uses the indices as that's the only way to get a consistent view >> of the tx queue state. >> >> At the moment I don't think the tx ring is big enough to use IFQ_DEQUEUE >> instead of ifq_deq_begin/commit, but maybe I'm wrong about that. > > dlg@ pointed out that the way the driver modifies rl_flags would be unsafe > with > this diff applied, since it sets and clears bits during interrupts and ioctl > handling with no locking. The only change it makes during interrupts is to > flip RL_FLAG_TIMERINTR, which otherwise is only used during re_init and > re_stop, so this updated diff splits it out into a separate field. > > ok?
im ok with this diff. tested on RTL8168E/8111E. dlg > > > Index: dev/ic/re.c > =================================================================== > RCS file: /cvs/src/sys/dev/ic/re.c,v > retrieving revision 1.187 > diff -u -p -r1.187 re.c > --- dev/ic/re.c 25 Nov 2015 03:09:58 -0000 1.187 > +++ dev/ic/re.c 26 Dec 2015 03:43:35 -0000 > @@ -120,6 +120,7 @@ > #include <sys/device.h> > #include <sys/timeout.h> > #include <sys/socket.h> > +#include <sys/atomic.h> > > #include <machine/bus.h> > > @@ -151,7 +152,7 @@ int redebug = 0; > > static inline void re_set_bufaddr(struct rl_desc *, bus_addr_t); > > -int re_encap(struct rl_softc *, struct mbuf *, int *); > +int re_encap(struct rl_softc *, struct mbuf *, struct rl_txq *, int *); > > int re_newbuf(struct rl_softc *); > int re_rx_list_init(struct rl_softc *); > @@ -1448,18 +1449,14 @@ re_txeof(struct rl_softc *sc) > struct ifnet *ifp; > struct rl_txq *txq; > uint32_t txstat; > - int idx, descidx, tx = 0; > + int idx, descidx, tx_free, freed = 0; > > ifp = &sc->sc_arpcom.ac_if; > > - for (idx = sc->rl_ldata.rl_txq_considx;; idx = RL_NEXT_TXQ(sc, idx)) { > + for (idx = sc->rl_ldata.rl_txq_considx; > + idx != sc->rl_ldata.rl_txq_prodidx; idx = RL_NEXT_TXQ(sc, idx)) { > txq = &sc->rl_ldata.rl_txq[idx]; > > - if (txq->txq_mbuf == NULL) { > - KASSERT(idx == sc->rl_ldata.rl_txq_prodidx); > - break; > - } > - > descidx = txq->txq_descidx; > RL_TXDESCSYNC(sc, descidx, > BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); > @@ -1470,9 +1467,7 @@ re_txeof(struct rl_softc *sc) > if (txstat & RL_TDESC_CMD_OWN) > break; > > - tx = 1; > - sc->rl_ldata.rl_tx_free += txq->txq_nsegs; > - KASSERT(sc->rl_ldata.rl_tx_free <= sc->rl_ldata.rl_tx_desc_cnt); > + freed += txq->txq_nsegs; > bus_dmamap_sync(sc->sc_dmat, txq->txq_dmamap, > 0, txq->txq_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE); > bus_dmamap_unload(sc->sc_dmat, txq->txq_dmamap); > @@ -1487,9 +1482,13 @@ re_txeof(struct rl_softc *sc) > ifp->if_opackets++; > } > > - sc->rl_ldata.rl_txq_considx = idx; > + if (freed == 0) > + return (0); > > - ifq_clr_oactive(&ifp->if_snd); > + tx_free = atomic_add_int_nv(&sc->rl_ldata.rl_tx_free, freed); > + KASSERT(tx_free <= sc->rl_ldata.rl_tx_desc_cnt); > + > + sc->rl_ldata.rl_txq_considx = idx; > > /* > * Some chips will ignore a second TX request issued while an > @@ -1498,12 +1497,14 @@ re_txeof(struct rl_softc *sc) > * to restart the channel here to flush them out. This only > * seems to be required with the PCIe devices. > */ > - if (sc->rl_ldata.rl_tx_free < sc->rl_ldata.rl_tx_desc_cnt) > + if (tx_free < sc->rl_ldata.rl_tx_desc_cnt) > CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); > - else > + else { > + ifq_clr_oactive(&ifp->if_snd); > ifp->if_timer = 0; > + } > > - return (tx); > + return (1); > } > > void > @@ -1566,13 +1567,15 @@ re_intr(void *arg) > } > > if (status & RL_ISR_SYSTEM_ERR) { > + KERNEL_LOCK(); > re_init(ifp); > + KERNEL_UNLOCK(); > claimed = 1; > } > } > > if (sc->rl_imtype == RL_IMTYPE_SIM) { > - if ((sc->rl_flags & RL_FLAG_TIMERINTR)) { > + if (sc->rl_timerintr) { > if ((tx | rx) == 0) { > /* > * Nothing needs to be processed, fallback > @@ -1599,7 +1602,11 @@ re_intr(void *arg) > } > } > > - re_start(ifp); > + if (!IFQ_IS_EMPTY(&ifp->if_snd)) { > + KERNEL_LOCK(); > + re_start(ifp); > + KERNEL_UNLOCK(); > + } > > CSR_WRITE_2(sc, RL_IMR, sc->rl_intrs); > > @@ -1607,7 +1614,7 @@ re_intr(void *arg) > } > > int > -re_encap(struct rl_softc *sc, struct mbuf *m, int *idx) > +re_encap(struct rl_softc *sc, struct mbuf *m, struct rl_txq *txq, int *used) > { > bus_dmamap_t map; > struct mbuf *mp, mh; > @@ -1616,7 +1623,6 @@ re_encap(struct rl_softc *sc, struct mbu > struct ip *ip; > struct rl_desc *d; > u_int32_t cmdstat, vlanctl = 0, csum_flags = 0; > - struct rl_txq *txq; > > /* > * Set up checksum offload. Note: checksum offload bits must > @@ -1669,7 +1675,6 @@ re_encap(struct rl_softc *sc, struct mbu > } > } > > - txq = &sc->rl_ldata.rl_txq[*idx]; > map = txq->txq_dmamap; > > error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m, > @@ -1710,7 +1715,7 @@ re_encap(struct rl_softc *sc, struct mbu > nsegs++; > } > > - if (sc->rl_ldata.rl_tx_free - nsegs <= 1) { > + if (sc->rl_ldata.rl_tx_free - (*used + nsegs) <= 1) { > error = EFBIG; > goto fail_unload; > } > @@ -1812,10 +1817,9 @@ re_encap(struct rl_softc *sc, struct mbu > txq->txq_descidx = lastidx; > txq->txq_nsegs = nsegs; > > - sc->rl_ldata.rl_tx_free -= nsegs; > sc->rl_ldata.rl_tx_nextfree = curidx; > > - *idx = RL_NEXT_TXQ(sc, *idx); > + *used += nsegs; > > return (0); > > @@ -1834,30 +1838,25 @@ re_start(struct ifnet *ifp) > { > struct rl_softc *sc = ifp->if_softc; > struct mbuf *m; > - int idx, queued = 0, error; > + int idx, used = 0, txq_free, error; > > if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd)) > return; > if ((sc->rl_flags & RL_FLAG_LINK) == 0) > return; > - if (IFQ_IS_EMPTY(&ifp->if_snd)) > - return; > > + txq_free = sc->rl_ldata.rl_txq_considx; > idx = sc->rl_ldata.rl_txq_prodidx; > + if (idx >= txq_free) > + txq_free += RL_TX_QLEN; > + txq_free -= idx; > > - for (;;) { > + while (txq_free > 1) { > m = ifq_deq_begin(&ifp->if_snd); > if (m == NULL) > break; > > - if (sc->rl_ldata.rl_txq[idx].txq_mbuf != NULL) { > - KASSERT(idx == sc->rl_ldata.rl_txq_considx); > - ifq_deq_rollback(&ifp->if_snd, m); > - ifq_set_oactive(&ifp->if_snd); > - break; > - } > - > - error = re_encap(sc, m, &idx); > + error = re_encap(sc, m, &sc->rl_ldata.rl_txq[idx], &used); > if (error != 0 && error != ENOBUFS) { > ifq_deq_rollback(&ifp->if_snd, m); > ifq_set_oactive(&ifp->if_snd); > @@ -1869,31 +1868,25 @@ re_start(struct ifnet *ifp) > continue; > } > > - /* now we are committed to transmit the packet */ > ifq_deq_commit(&ifp->if_snd, m); > - queued++; > > #if NBPFILTER > 0 > - /* > - * If there's a BPF listener, bounce a copy of this frame > - * to him. > - */ > if (ifp->if_bpf) > bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); > #endif > + idx = RL_NEXT_TXQ(sc, idx); > + txq_free--; > } > > - if (queued == 0) > + if (used == 0) > return; > - > + > + ifp->if_timer = 5; > sc->rl_ldata.rl_txq_prodidx = idx; > + atomic_sub_int(&sc->rl_ldata.rl_tx_free, used); > + KASSERT(sc->rl_ldata.rl_tx_free >= 0); > > CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); > - > - /* > - * Set a timeout in case the chip goes out to lunch. > - */ > - ifp->if_timer = 5; > } > > int > @@ -2142,14 +2135,13 @@ re_stop(struct ifnet *ifp) > sc = ifp->if_softc; > > ifp->if_timer = 0; > - sc->rl_flags &= ~(RL_FLAG_LINK|RL_FLAG_TIMERINTR); > + sc->rl_flags &= ~RL_FLAG_LINK; > + sc->rl_timerintr = 0; > > timeout_del(&sc->timer_handle); > ifp->if_flags &= ~IFF_RUNNING; > ifq_clr_oactive(&ifp->if_snd); > > - mii_down(&sc->sc_mii); > - > /* > * Disable accepting frames to put RX MAC into idle state. > * Otherwise it's possible to get frames while stop command > @@ -2196,6 +2188,10 @@ re_stop(struct ifnet *ifp) > sc->rl_head = sc->rl_tail = NULL; > } > > + intr_barrier(sc->sc_ih); > + > + mii_down(&sc->sc_mii); > + > /* Free the TX list buffers. */ > for (i = 0; i < RL_TX_QLEN; i++) { > if (sc->rl_ldata.rl_txq[i].txq_mbuf != NULL) { > @@ -2276,7 +2272,7 @@ re_setup_sim_im(struct rl_softc *sc) > CSR_WRITE_4(sc, RL_TIMERINT_8169, ticks); > } > CSR_WRITE_4(sc, RL_TIMERCNT, 1); /* reload */ > - sc->rl_flags |= RL_FLAG_TIMERINTR; > + sc->rl_timerintr = 1; > } > > void > @@ -2286,7 +2282,7 @@ re_disable_sim_im(struct rl_softc *sc) > CSR_WRITE_4(sc, RL_TIMERINT, 0); > else > CSR_WRITE_4(sc, RL_TIMERINT_8169, 0); > - sc->rl_flags &= ~RL_FLAG_TIMERINTR; > + sc->rl_timerintr = 0; > } > > void > Index: dev/ic/rtl81x9reg.h > =================================================================== > RCS file: /cvs/src/sys/dev/ic/rtl81x9reg.h,v > retrieving revision 1.96 > diff -u -p -r1.96 rtl81x9reg.h > --- dev/ic/rtl81x9reg.h 2 Nov 2015 00:08:50 -0000 1.96 > +++ dev/ic/rtl81x9reg.h 26 Dec 2015 03:43:35 -0000 > @@ -918,6 +918,7 @@ struct rl_softc { > #define RL_IMTYPE_NONE 0 > #define RL_IMTYPE_SIM 1 /* simulated */ > #define RL_IMTYPE_HW 2 /* hardware based */ > + int rl_timerintr; > }; > > /* > Index: dev/pci/if_re_pci.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_re_pci.c,v > retrieving revision 1.49 > diff -u -p -r1.49 if_re_pci.c > --- dev/pci/if_re_pci.c 24 Nov 2015 17:11:39 -0000 1.49 > +++ dev/pci/if_re_pci.c 26 Dec 2015 03:43:35 -0000 > @@ -156,8 +156,8 @@ re_pci_attach(struct device *parent, str > return; > } > intrstr = pci_intr_string(pc, ih); > - psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, re_intr, sc, > - sc->sc_dev.dv_xname); > + psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, re_intr, > + sc, sc->sc_dev.dv_xname); > if (psc->sc_ih == NULL) { > printf(": couldn't establish interrupt"); > if (intrstr != NULL) >