The main interesting bit here is the txeof and start loops, which previously operated based on the prod/cons indices and the contents of the tx queue, but now just uses the indices as that's the only way to get a consistent view of the tx queue state.
At the moment I don't think the tx ring is big enough to use IFQ_DEQUEUE instead of ifq_deq_begin/commit, but maybe I'm wrong about that. can someone try this on an APU1? Index: ic/re.c =================================================================== RCS file: /cvs/src/sys/dev/ic/re.c,v retrieving revision 1.187 diff -u -p -r1.187 re.c --- ic/re.c 25 Nov 2015 03:09:58 -0000 1.187 +++ ic/re.c 4 Dec 2015 15:07:02 -0000 @@ -120,6 +120,7 @@ #include <sys/device.h> #include <sys/timeout.h> #include <sys/socket.h> +#include <sys/atomic.h> #include <machine/bus.h> @@ -151,7 +152,7 @@ int redebug = 0; static inline void re_set_bufaddr(struct rl_desc *, bus_addr_t); -int re_encap(struct rl_softc *, struct mbuf *, int *); +int re_encap(struct rl_softc *, struct mbuf *, struct rl_txq *, int *); int re_newbuf(struct rl_softc *); int re_rx_list_init(struct rl_softc *); @@ -1448,18 +1449,14 @@ re_txeof(struct rl_softc *sc) struct ifnet *ifp; struct rl_txq *txq; uint32_t txstat; - int idx, descidx, tx = 0; + int idx, descidx, tx_free, freed = 0; ifp = &sc->sc_arpcom.ac_if; - for (idx = sc->rl_ldata.rl_txq_considx;; idx = RL_NEXT_TXQ(sc, idx)) { + for (idx = sc->rl_ldata.rl_txq_considx; + idx != sc->rl_ldata.rl_txq_prodidx; idx = RL_NEXT_TXQ(sc, idx)) { txq = &sc->rl_ldata.rl_txq[idx]; - if (txq->txq_mbuf == NULL) { - KASSERT(idx == sc->rl_ldata.rl_txq_prodidx); - break; - } - descidx = txq->txq_descidx; RL_TXDESCSYNC(sc, descidx, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); @@ -1470,9 +1467,7 @@ re_txeof(struct rl_softc *sc) if (txstat & RL_TDESC_CMD_OWN) break; - tx = 1; - sc->rl_ldata.rl_tx_free += txq->txq_nsegs; - KASSERT(sc->rl_ldata.rl_tx_free <= sc->rl_ldata.rl_tx_desc_cnt); + freed += txq->txq_nsegs; bus_dmamap_sync(sc->sc_dmat, txq->txq_dmamap, 0, txq->txq_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, txq->txq_dmamap); @@ -1487,9 +1482,13 @@ re_txeof(struct rl_softc *sc) ifp->if_opackets++; } - sc->rl_ldata.rl_txq_considx = idx; + if (freed == 0) + return (0); - ifq_clr_oactive(&ifp->if_snd); + tx_free = atomic_add_int_nv(&sc->rl_ldata.rl_tx_free, freed); + KASSERT(tx_free <= sc->rl_ldata.rl_tx_desc_cnt); + + sc->rl_ldata.rl_txq_considx = idx; /* * Some chips will ignore a second TX request issued while an @@ -1498,12 +1497,14 @@ re_txeof(struct rl_softc *sc) * to restart the channel here to flush them out. This only * seems to be required with the PCIe devices. */ - if (sc->rl_ldata.rl_tx_free < sc->rl_ldata.rl_tx_desc_cnt) + if (tx_free < sc->rl_ldata.rl_tx_desc_cnt) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); - else + else { + ifq_clr_oactive(&ifp->if_snd); ifp->if_timer = 0; + } - return (tx); + return (1); } void @@ -1566,7 +1567,9 @@ re_intr(void *arg) } if (status & RL_ISR_SYSTEM_ERR) { + KERNEL_LOCK(); re_init(ifp); + KERNEL_UNLOCK(); claimed = 1; } } @@ -1599,7 +1602,11 @@ re_intr(void *arg) } } - re_start(ifp); + if (!IFQ_IS_EMPTY(&ifp->if_snd)) { + KERNEL_LOCK(); + re_start(ifp); + KERNEL_UNLOCK(); + } CSR_WRITE_2(sc, RL_IMR, sc->rl_intrs); @@ -1607,7 +1614,7 @@ re_intr(void *arg) } int -re_encap(struct rl_softc *sc, struct mbuf *m, int *idx) +re_encap(struct rl_softc *sc, struct mbuf *m, struct rl_txq *txq, int *used) { bus_dmamap_t map; struct mbuf *mp, mh; @@ -1616,7 +1623,6 @@ re_encap(struct rl_softc *sc, struct mbu struct ip *ip; struct rl_desc *d; u_int32_t cmdstat, vlanctl = 0, csum_flags = 0; - struct rl_txq *txq; /* * Set up checksum offload. Note: checksum offload bits must @@ -1669,7 +1675,6 @@ re_encap(struct rl_softc *sc, struct mbu } } - txq = &sc->rl_ldata.rl_txq[*idx]; map = txq->txq_dmamap; error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m, @@ -1710,7 +1715,7 @@ re_encap(struct rl_softc *sc, struct mbu nsegs++; } - if (sc->rl_ldata.rl_tx_free - nsegs <= 1) { + if (sc->rl_ldata.rl_tx_free - (*used + nsegs) <= 1) { error = EFBIG; goto fail_unload; } @@ -1812,10 +1817,9 @@ re_encap(struct rl_softc *sc, struct mbu txq->txq_descidx = lastidx; txq->txq_nsegs = nsegs; - sc->rl_ldata.rl_tx_free -= nsegs; sc->rl_ldata.rl_tx_nextfree = curidx; - *idx = RL_NEXT_TXQ(sc, *idx); + *used += nsegs; return (0); @@ -1834,30 +1838,25 @@ re_start(struct ifnet *ifp) { struct rl_softc *sc = ifp->if_softc; struct mbuf *m; - int idx, queued = 0, error; + int idx, used = 0, txq_free, error; if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd)) return; if ((sc->rl_flags & RL_FLAG_LINK) == 0) return; - if (IFQ_IS_EMPTY(&ifp->if_snd)) - return; + txq_free = sc->rl_ldata.rl_txq_considx; idx = sc->rl_ldata.rl_txq_prodidx; + if (idx >= txq_free) + txq_free += RL_TX_QLEN; + txq_free -= idx; - for (;;) { + while (txq_free > 1) { m = ifq_deq_begin(&ifp->if_snd); if (m == NULL) break; - if (sc->rl_ldata.rl_txq[idx].txq_mbuf != NULL) { - KASSERT(idx == sc->rl_ldata.rl_txq_considx); - ifq_deq_rollback(&ifp->if_snd, m); - ifq_set_oactive(&ifp->if_snd); - break; - } - - error = re_encap(sc, m, &idx); + error = re_encap(sc, m, &sc->rl_ldata.rl_txq[idx], &used); if (error != 0 && error != ENOBUFS) { ifq_deq_rollback(&ifp->if_snd, m); ifq_set_oactive(&ifp->if_snd); @@ -1869,31 +1868,25 @@ re_start(struct ifnet *ifp) continue; } - /* now we are committed to transmit the packet */ ifq_deq_commit(&ifp->if_snd, m); - queued++; #if NBPFILTER > 0 - /* - * If there's a BPF listener, bounce a copy of this frame - * to him. - */ if (ifp->if_bpf) bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif + idx = RL_NEXT_TXQ(sc, idx); + txq_free--; } - if (queued == 0) + if (used == 0) return; - + + ifp->if_timer = 5; sc->rl_ldata.rl_txq_prodidx = idx; + atomic_sub_int(&sc->rl_ldata.rl_tx_free, used); + KASSERT(sc->rl_ldata.rl_tx_free >= 0); CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); - - /* - * Set a timeout in case the chip goes out to lunch. - */ - ifp->if_timer = 5; } int @@ -2148,8 +2141,6 @@ re_stop(struct ifnet *ifp) ifp->if_flags &= ~IFF_RUNNING; ifq_clr_oactive(&ifp->if_snd); - mii_down(&sc->sc_mii); - /* * Disable accepting frames to put RX MAC into idle state. * Otherwise it's possible to get frames while stop command @@ -2195,6 +2186,10 @@ re_stop(struct ifnet *ifp) m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } + + intr_barrier(sc->sc_ih); + + mii_down(&sc->sc_mii); /* Free the TX list buffers. */ for (i = 0; i < RL_TX_QLEN; i++) { Index: pci/if_re_pci.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_re_pci.c,v retrieving revision 1.49 diff -u -p -r1.49 if_re_pci.c --- pci/if_re_pci.c 24 Nov 2015 17:11:39 -0000 1.49 +++ pci/if_re_pci.c 4 Dec 2015 15:07:02 -0000 @@ -156,8 +156,8 @@ re_pci_attach(struct device *parent, str return; } intrstr = pci_intr_string(pc, ih); - psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, re_intr, sc, - sc->sc_dev.dv_xname); + psc->sc_ih = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, re_intr, + sc, sc->sc_dev.dv_xname); if (psc->sc_ih == NULL) { printf(": couldn't establish interrupt"); if (intrstr != NULL)