On Tue, 2 Feb 2016, Stefan Sperling wrote:  
> > On Sat, Jan 30, 2016 at 10:49:38PM +1300, Richard Procter wrote:
> > -           ring->queued--;
> > +           atomic_dec_int(&ring->queued);
> 
> > -   ring->queued += ntxds;
> > +   atomic_add_int(&ring->queued, ntxds);
> 
> I don't think these make a difference in the current way of things.
> Wireless drivers run interrupts under the kernel big lock, interrupts
> aren't preemptible, and AFAIK (most?) 32bit integer operations are atomic.  
[...]
> Hmm. Taking a closer look, if_start() is already called under splnet.
> So adding splnet to rt2860_tx() shouldn't make a difference.

You're right, the atomic is unnecessary. I'd assumed rt2860_tx() was 
running under splsoftnet. I could have sworn I'd seen errors without the 
atomic but that now tests fine, too. (I still see errors without the 
ring->cur fix.)

> This also means the card cannot interrupt in the way your comment
> describes, i.e. the problem you're "fixing" here cannot exist... ?

Also right.

This simplifies things --- see below for the patch minus the above.  
Without it my card under stress sees 1 oerror per ~217 packets; it's 
now sent 5E6 without seeing any.

cheers, 
Richard. 

-------------------------------
* fix watchdog timeouts and dropped frames under load. 

- on full tx ring, ring->cur wraps to an active tx descriptor. Passing 
that wrapped value to the card was observed to cause general flakiness.

Fix prevents the wrap at the cost of reducing usable tx descriptors by 
one.

Index: sys/dev/ic/rt2860.c
===================================================================
--- sys.orig/dev/ic/rt2860.c
+++ sys/dev/ic/rt2860.c
@@ -1171,7 +1171,7 @@ rt2860_tx_intr(struct rt2860_softc *sc,
        }
 
        sc->sc_tx_timer = 0;
-       if (ring->queued < RT2860_TX_RING_COUNT)
+       if (ring->queued < RT2860_TX_RING_MAX)
                sc->qfullmsk &= ~(1 << qid);
        ifq_clr_oactive(&ifp->if_snd);
        rt2860_start(ifp);
@@ -1618,7 +1618,7 @@ rt2860_tx(struct rt2860_softc *sc, struc
                /* determine how many TXDs are required */
                ntxds = 1 + (data->map->dm_nsegs / 2);
 
-               if (ring->queued + ntxds >= RT2860_TX_RING_COUNT) {
+               if (ring->queued + ntxds >= RT2860_TX_RING_MAX) {
                        /* not enough free TXDs, force mbuf defrag */
                        bus_dmamap_unload(sc->sc_dmat, data->map);
                        error = EFBIG;
@@ -1656,7 +1656,7 @@ rt2860_tx(struct rt2860_softc *sc, struc
                /* determine how many TXDs are now required */
                ntxds = 1 + (data->map->dm_nsegs / 2);
 
-               if (ring->queued + ntxds >= RT2860_TX_RING_COUNT) {
+               if (ring->queued + ntxds >= RT2860_TX_RING_MAX) {
                        /* this is a hopeless case, drop the mbuf! */
                        bus_dmamap_unload(sc->sc_dmat, data->map);
                        m_freem(m);
@@ -1714,7 +1714,7 @@ rt2860_tx(struct rt2860_softc *sc, struc
 
        ring->cur = (ring->cur + 1) % RT2860_TX_RING_COUNT;
        ring->queued += ntxds;
-       if (ring->queued >= RT2860_TX_RING_COUNT)
+       if (ring->queued >= RT2860_TX_RING_MAX)
                sc->qfullmsk |= 1 << qid;
 
        /* kick Tx */
Index: sys/dev/ic/rt2860var.h
===================================================================
--- sys.orig/dev/ic/rt2860var.h
+++ sys/dev/ic/rt2860var.h
@@ -17,8 +17,9 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-#define RT2860_TX_RING_COUNT   64
 #define RT2860_RX_RING_COUNT   128
+#define RT2860_TX_RING_COUNT   64
+#define RT2860_TX_RING_MAX     (RT2860_TX_RING_COUNT - 1)
 #define RT2860_TX_POOL_COUNT   (RT2860_TX_RING_COUNT * 2)
 
 #define RT2860_MAX_SCATTER     ((RT2860_TX_RING_COUNT * 2) - 1)

-------------------------------
* replace custom defrag with m_defrag() 

- This fixes an error in the existing code: the "hopeless case" guard
equivales 'ring now full', so oactive is never set: the code drops any mbuf
that would fill the ring. This occurs often in practice.

- The preceding patch allows the ring to fill safely. 

- The new code avoids some hoop-jumping. Currently, a tx dma-map can map 
an entire tx ring. Therefore an mbuf that fits a dma-map may yet not fit 
into the tx ring's remaining space.  To be sure it can, we must in general 
count the mbuf's fragments and, if necessary, defrag it and reload the 
dmamap.

The new code limits the dmamap to cover at most 8 tx descriptors (= 15
fragments): now, if an mbuf fits a dma-map it will fit any ring with at least
8 free descriptors. So we need only check for 8 free descriptors and are
longer obliged to count fragments and jump hoops. The cost is unused tx
ring descriptors, at most 7 of 63, when a one-fragment mbuf occupies one
descriptor in the last block of 8.

- For simplicity on error return, shift responsibilty for calling
m_freem() to rt2860_tx()'s caller (which already calls
ieee80211_release_node()).

Index: sys/dev/ic/rt2860.c
===================================================================
--- sys.orig/dev/ic/rt2860.c
+++ sys/dev/ic/rt2860.c
@@ -1171,7 +1171,7 @@ rt2860_tx_intr(struct rt2860_softc *sc,
        }
 
        sc->sc_tx_timer = 0;
-       if (ring->queued < RT2860_TX_RING_MAX)
+       if (ring->queued < RT2860_TX_RING_FULL)
                sc->qfullmsk &= ~(1 << qid);
        ifq_clr_oactive(&ifp->if_snd);
        rt2860_start(ifp);
@@ -1481,12 +1481,11 @@ rt2860_tx(struct rt2860_softc *sc, struc
        struct rt2860_txd *txd;
        struct rt2860_txwi *txwi;
        struct ieee80211_frame *wh;
-       struct mbuf *m1;
        bus_dma_segment_t *seg;
        u_int hdrlen;
        uint16_t qos, dur;
        uint8_t type, qsel, mcs, pid, tid, qid;
-       int nsegs, ntxds, hasqos, ridx, ctl_ridx, error;
+       int nsegs, hasqos, ridx, ctl_ridx;
 
        /* the data pool contains at least one element, pick the first */
        data = SLIST_FIRST(&sc->data_pool);
@@ -1606,62 +1605,14 @@ rt2860_tx(struct rt2860_softc *sc, struc
        memcpy(txwi + 1, wh, hdrlen);
        m_adj(m, hdrlen);
 
-       error = bus_dmamap_load_mbuf(sc->sc_dmat, data->map, m,
-           BUS_DMA_NOWAIT);
-       if (__predict_false(error != 0 && error != EFBIG)) {
-               printf("%s: can't map mbuf (error %d)\n",
-                   sc->sc_dev.dv_xname, error);
-               m_freem(m);
-               return error;
-       }
-       if (__predict_true(error == 0)) {
-               /* determine how many TXDs are required */
-               ntxds = 1 + (data->map->dm_nsegs / 2);
-
-               if (ring->queued + ntxds >= RT2860_TX_RING_MAX) {
-                       /* not enough free TXDs, force mbuf defrag */
-                       bus_dmamap_unload(sc->sc_dmat, data->map);
-                       error = EFBIG;
-               }
-       }
-       if (__predict_false(error != 0)) {
-               /* too many fragments, linearize */
-               MGETHDR(m1, M_DONTWAIT, MT_DATA);
-               if (m1 == NULL) {
-                       m_freem(m);
-                       return ENOBUFS;
-               }
-               if (m->m_pkthdr.len > MHLEN) {
-                       MCLGET(m1, M_DONTWAIT);
-                       if (!(m1->m_flags & M_EXT)) {
-                               m_freem(m);
-                               m_freem(m1);
-                               return ENOBUFS;
-                       }
-               }
-               m_copydata(m, 0, m->m_pkthdr.len, mtod(m1, caddr_t));
-               m1->m_pkthdr.len = m1->m_len = m->m_pkthdr.len;
-               m_freem(m);
-               m = m1;
+       KASSERT (ring->queued < RT2860_TX_RING_FULL);
 
-               error = bus_dmamap_load_mbuf(sc->sc_dmat, data->map, m,
-                   BUS_DMA_NOWAIT);
-               if (__predict_false(error != 0)) {
-                       printf("%s: can't map mbuf (error %d)\n",
-                           sc->sc_dev.dv_xname, error);
-                       m_freem(m);
-                       return error;
-               }
-
-               /* determine how many TXDs are now required */
-               ntxds = 1 + (data->map->dm_nsegs / 2);
-
-               if (ring->queued + ntxds >= RT2860_TX_RING_MAX) {
-                       /* this is a hopeless case, drop the mbuf! */
-                       bus_dmamap_unload(sc->sc_dmat, data->map);
-                       m_freem(m);
-                       return ENOBUFS;
-               }
+       if (bus_dmamap_load_mbuf(sc->sc_dmat, data->map, m, BUS_DMA_NOWAIT)) {
+               if (m_defrag(m, M_DONTWAIT))
+                       return (ENOBUFS);
+               if (bus_dmamap_load_mbuf(sc->sc_dmat,
+                   data->map, m, BUS_DMA_NOWAIT))
+                       return (EFBIG);
        }
 
        qsel = (qid < EDCA_NUM_AC) ? RT2860_TX_QSEL_EDCA : RT2860_TX_QSEL_MGMT;
@@ -1713,10 +1664,12 @@ rt2860_tx(struct rt2860_softc *sc, struc
            qid, txwi->wcid, data->map->dm_nsegs, ridx));
 
        ring->cur = (ring->cur + 1) % RT2860_TX_RING_COUNT;
-       ring->queued += ntxds;
-       if (ring->queued >= RT2860_TX_RING_MAX)
+       ring->queued += 1 + (data->map->dm_nsegs / 2);
+       if (ring->queued >= RT2860_TX_RING_FULL)
                sc->qfullmsk |= 1 << qid;
 
+       KASSERT (ring->queued <= RT2860_TX_RING_MAX);
+
        /* kick Tx */
        RAL_WRITE(sc, RT2860_TX_CTX_IDX(qid), ring->cur);
 
@@ -1771,6 +1724,7 @@ sendit:
                        bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_OUT);
 #endif
                if (rt2860_tx(sc, m, ni) != 0) {
+                       m_freem(m);
                        ieee80211_release_node(ic, ni);
                        ifp->if_oerrors++;
                        continue;
Index: sys/dev/ic/rt2860var.h
===================================================================
--- sys.orig/dev/ic/rt2860var.h
+++ sys/dev/ic/rt2860var.h
@@ -17,13 +17,15 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#define RT2860_MAX_SCATTER     15
+#define RT2860_MAX_SCATTER_TXD (1 + (RT2860_MAX_SCATTER / 2))
+
 #define RT2860_RX_RING_COUNT   128
 #define RT2860_TX_RING_COUNT   64
 #define RT2860_TX_RING_MAX     (RT2860_TX_RING_COUNT - 1)
+#define RT2860_TX_RING_FULL    (RT2860_TX_RING_MAX - RT2860_MAX_SCATTER_TXD)
 #define RT2860_TX_POOL_COUNT   (RT2860_TX_RING_COUNT * 2)
 
-#define RT2860_MAX_SCATTER     ((RT2860_TX_RING_COUNT * 2) - 1)
-
 /* HW supports up to 255 STAs */
 #define RT2860_WCID_MAX                254
 #define RT2860_AID2WCID(aid)   ((aid) & 0xff)

Reply via email to