On Sat, Dec 05, 2015 at 06:11:51PM +0100, Jonathan Matthew wrote: > The main interesting bit here is the txeof and start loops, which previously > operated based on the prod/cons indices and the contents of the tx queue, > but now just uses the indices as that's the only way to get a consistent view > of the tx queue state. > > At the moment I don't think the tx ring is big enough to use IFQ_DEQUEUE > instead of ifq_deq_begin/commit, but maybe I'm wrong about that.
dlg@ pointed out that the way the driver modifies rl_flags would be unsafe with this diff applied, since it sets and clears bits during interrupts and ioctl handling with no locking. The only change it makes during interrupts is to flip RL_FLAG_TIMERINTR, which otherwise is only used during re_init and re_stop, so this updated diff splits it out into a separate field. ok? Index: dev/ic/re.c =================================================================== RCS file: /cvs/src/sys/dev/ic/re.c,v retrieving revision 1.187 diff -u -p -r1.187 re.c --- dev/ic/re.c 25 Nov 2015 03:09:58 -0000 1.187 +++ dev/ic/re.c 26 Dec 2015 03:43:35 -0000 @@ -120,6 +120,7 @@ #include <sys/device.h> #include <sys/timeout.h> #include <sys/socket.h> +#include <sys/atomic.h> #include <machine/bus.h> @@ -151,7 +152,7 @@ int redebug = 0; static inline void re_set_bufaddr(struct rl_desc *, bus_addr_t); -int re_encap(struct rl_softc *, struct mbuf *, int *); +int re_encap(struct rl_softc *, struct mbuf *, struct rl_txq *, int *); int re_newbuf(struct rl_softc *); int re_rx_list_init(struct rl_softc *); @@ -1448,18 +1449,14 @@ re_txeof(struct rl_softc *sc) struct ifnet *ifp; struct rl_txq *txq; uint32_t txstat; - int idx, descidx, tx = 0; + int idx, descidx, tx_free, freed = 0; ifp = &sc->sc_arpcom.ac_if; - for (idx = sc->rl_ldata.rl_txq_considx;; idx = RL_NEXT_TXQ(sc, idx)) { + for (idx = sc->rl_ldata.rl_txq_considx; + idx != sc->rl_ldata.rl_txq_prodidx; idx = RL_NEXT_TXQ(sc, idx)) { txq = &sc->rl_ldata.rl_txq[idx]; - if (txq->txq_mbuf == NULL) { - KASSERT(idx == sc->rl_ldata.rl_txq_prodidx); - break; - } - descidx = txq->txq_descidx; RL_TXDESCSYNC(sc, descidx, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); @@ -1470,9 +1467,7 @@ re_txeof(struct rl_softc *sc) if (txstat & RL_TDESC_CMD_OWN) break; - tx = 1; - sc->rl_ldata.rl_tx_free += txq->txq_nsegs; - KASSERT(sc->rl_ldata.rl_tx_free <= sc->rl_ldata.rl_tx_desc_cnt); + freed += txq->txq_nsegs; bus_dmamap_sync(sc->sc_dmat, txq->txq_dmamap, 0, txq->txq_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, txq->txq_dmamap); @@ -1487,9 +1482,13 @@ re_txeof(struct rl_softc *sc) ifp->if_opackets++; } - sc->rl_ldata.rl_txq_considx = idx; + if (freed == 0) + return (0); - ifq_clr_oactive(&ifp->if_snd); + tx_free = atomic_add_int_nv(&sc->rl_ldata.rl_tx_free, freed); + KASSERT(tx_free <= sc->rl_ldata.rl_tx_desc_cnt); + + sc->rl_ldata.rl_txq_considx = idx; /* * Some chips will ignore a second TX request issued while an @@ -1498,12 +1497,14 @@ re_txeof(struct rl_softc *sc) * to restart the channel here to flush them out. This only * seems to be required with the PCIe devices. */ - if (sc->rl_ldata.rl_tx_free < sc->rl_ldata.rl_tx_desc_cnt) + if (tx_free < sc->rl_ldata.rl_tx_desc_cnt) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); - else + else { + ifq_clr_oactive(&ifp->if_snd); ifp->if_timer = 0; + } - return (tx); + return (1); } void @@ -1566,13 +1567,15 @@ re_intr(void *arg) } if (status & RL_ISR_SYSTEM_ERR) { + KERNEL_LOCK(); re_init(ifp); + KERNEL_UNLOCK(); claimed = 1; } } if (sc->rl_imtype == RL_IMTYPE_SIM) { - if ((sc->rl_flags & RL_FLAG_TIMERINTR)) { + if (sc->rl_timerintr) { if ((tx | rx) == 0) { /* * Nothing needs to be processed, fallback @@ -1599,7 +1602,11 @@ re_intr(void *arg) } } - re_start(ifp); + if (!IFQ_IS_EMPTY(&ifp->if_snd)) { + KERNEL_LOCK(); + re_start(ifp); + KERNEL_UNLOCK(); + } CSR_WRITE_2(sc, RL_IMR, sc->rl_intrs); @@ -1607,7 +1614,7 @@ re_intr(void *arg) } int -re_encap(struct rl_softc *sc, struct mbuf *m, int *idx) +re_encap(struct rl_softc *sc, struct mbuf *m, struct rl_txq *txq, int *used) { bus_dmamap_t map; struct mbuf *mp, mh; @@ -1616,7 +1623,6 @@ re_encap(struct rl_softc *sc, struct mbu struct ip *ip; struct rl_desc *d; u_int32_t cmdstat, vlanctl = 0, csum_flags = 0; - struct rl_txq *txq; /* * Set up checksum offload. Note: checksum offload bits must @@ -1669,7 +1675,6 @@ re_encap(struct rl_softc *sc, struct mbu } } - txq = &sc->rl_ldata.rl_txq[*idx]; map = txq->txq_dmamap; error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m, @@ -1710,7 +1715,7 @@ re_encap(struct rl_softc *sc, struct mbu nsegs++; } - if (sc->rl_ldata.rl_tx_free - nsegs <= 1) { + if (sc->rl_ldata.rl_tx_free - (*used + nsegs) <= 1) { error = EFBIG; goto fail_unload; } @@ -1812,10 +1817,9 @@ re_encap(struct rl_softc *sc, struct mbu txq->txq_descidx = lastidx; txq->txq_nsegs = nsegs; - sc->rl_ldata.rl_tx_free -= nsegs; sc->rl_ldata.rl_tx_nextfree = curidx; - *idx = RL_NEXT_TXQ(sc, *idx); + *used += nsegs; return (0); @@ -1834,30 +1838,25 @@ re_start(struct ifnet *ifp) { struct rl_softc *sc = ifp->if_softc; struct mbuf *m; - int idx, queued = 0, error; + int idx, used = 0, txq_free, error; if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd)) return; if ((sc->rl_flags & RL_FLAG_LINK) == 0) return; - if (IFQ_IS_EMPTY(&ifp->if_snd)) - return; + txq_free = sc->rl_ldata.rl_txq_considx; idx = sc->rl_ldata.rl_txq_prodidx; + if (idx >= txq_free) + txq_free += RL_TX_QLEN; + txq_free -= idx; - for (;;) { + while (txq_free > 1) { m = ifq_deq_begin(&ifp->if_snd); if (m == NULL) break; - if (sc->rl_ldata.rl_txq[idx].txq_mbuf != NULL) { - KASSERT(idx == sc->rl_ldata.rl_txq_considx); - ifq_deq_rollback(&ifp->if_snd, m); - ifq_set_oactive(&ifp->if_snd); - break; - } - - error = re_encap(sc, m, &idx); + error = re_encap(sc, m, &sc->rl_ldata.rl_txq[idx], &used); if (error != 0 && error != ENOBUFS) { ifq_deq_rollback(&ifp->if_snd, m); ifq_set_oactive(&ifp->if_snd); @@ -1869,31 +1868,25 @@ re_start(struct ifnet *ifp) continue; } - /* now we are committed to transmit the packet */ ifq_deq_commit(&ifp->if_snd, m); - queued++; #if NBPFILTER > 0 - /* - * If there's a BPF listener, bounce a copy of this frame - * to him. - */ if (ifp->if_bpf) bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif + idx = RL_NEXT_TXQ(sc, idx); + txq_free--; } - if (queued == 0) + if (used == 0) return; - + + ifp->if_timer = 5; sc->rl_ldata.rl_txq_prodidx = idx; + atomic_sub_int(&sc->rl_ldata.rl_tx_free, used); + KASSERT(sc->rl_ldata.rl_tx_free >= 0); CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); - - /* - * Set a timeout in case the chip goes out to lunch. - */ - ifp->if_timer = 5; } int @@ -2142,14 +2135,13 @@ re_stop(struct ifnet *ifp) sc = ifp->if_softc; ifp->if_timer = 0; - sc->rl_flags &= ~(RL_FLAG_LINK|RL_FLAG_TIMERINTR); + sc->rl_flags &= ~RL_FLAG_LINK; + sc->rl_timerintr = 0; timeout_del(&sc->timer_handle); ifp->if_flags &= ~IFF_RUNNING; ifq_clr_oactive(&ifp->if_snd); - mii_down(&sc->sc_mii); - /* * Disable accepting frames to put RX MAC into idle state. * Otherwise it's possible to get frames while stop command @@ -2196,6 +2188,10 @@ re_stop(struct ifnet *ifp) sc->rl_head = sc->rl_tail = NULL; } + intr_barrier(sc->sc_ih); + + mii_down(&sc->sc_mii); + /* Free the TX list buffers. */ for (i = 0; i < RL_TX_QLEN; i++) { if (sc->rl_ldata.rl_txq[i].txq_mbuf != NULL) { @@ -2276,7 +2272,7 @@ re_setup_sim_im(struct rl_softc *sc) CSR_WRITE_4(sc, RL_TIMERINT_8169, ticks); } CSR_WRITE_4(sc, RL_TIMERCNT, 1); /* reload */ - sc->rl_flags |= RL_FLAG_TIMERINTR; + sc->rl_timerintr = 1; } void @@ -2286,7 +2282,7 @@ re_disable_sim_im(struct rl_softc *sc) CSR_WRITE_4(sc, RL_TIMERINT, 0); else CSR_WRITE_4(sc, RL_TIMERINT_8169, 0); - sc->rl_flags &= ~RL_FLAG_TIMERINTR; + sc->rl_timerintr = 0; } void Index: dev/ic/rtl81x9reg.h =================================================================== RCS file: /cvs/src/sys/dev/ic/rtl81x9reg.h,v retrieving revision 1.96 diff -u -p -r1.96 rtl81x9reg.h --- dev/ic/rtl81x9reg.h 2 Nov 2015 00:08:50 -0000 1.96 +++ dev/ic/rtl81x9reg.h 26 Dec 2015 03:43:35 -0000 @@ -918,6 +918,7 @@ struct rl_softc { #define RL_IMTYPE_NONE 0 #define RL_IMTYPE_SIM 1 /* simulated */ #define RL_IMTYPE_HW 2 /* hardware based */ + int rl_timerintr; }; /* Index: dev/pci/if_re_pci.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_re_pci.c,v retrieving revision 1.49 diff -u -p -r1.49 if_re_pci.c --- dev/pci/if_re_pci.c 24 Nov 2015 17:11:39 -0000 1.49 +++ dev/pci/if_re_pci.c 26 Dec 2015 03:43:35 -0000 @@ -156,8 +156,8 @@ re_pci_attach(struct device *parent, str return; } intrstr = pci_intr_string(pc, ih); - psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, re_intr, sc, - sc->sc_dev.dv_xname); + psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, re_intr, + sc, sc->sc_dev.dv_xname); if (psc->sc_ih == NULL) { printf(": couldn't establish interrupt"); if (intrstr != NULL)