This brings bge(4) up to about where em(4) is. It involves a few different
changes, notably:
- per-ring refill timeouts, to ensure we don't try to refill a ring from the
timeout and an interrupt at the same time
- removing the list of tx dma maps and just assigning a map to use based
on the current ring slot number (this is why it's more - than +)
- using atomics to adjust bge_txcnt, saving those adjustments until the
end of the tx/txeof loops, and only acting on the adjusted value
- not adding any mutexes
I've tested it on amd64 with these:
bge0 at pci1 dev 0 function 0 "Broadcom BCM5721" rev 0x21, BCM5750 C1 (0x4201):
msi, address 00:18:f3:d1:80:64
bge0 at pci2 dev 2 function 0 "Broadcom BCM5703X" rev 0x02, BCM5702/5703 A2
(0x1002): apic 3 int 1, address 00:09:3d:00:84:d1
and on sparc64:
bge0 at pci7 dev 4 function 0 "Broadcom BCM5714" rev 0xa3, BCM5715 A3 (0x9003):
ivec 0x795, address 00:14:4f:00:5a:5a
On the sparc64 box (a v245), with if_input_process unlocked, this gets me 10-20%
more pps or about 100mbps more in tcpbench (550, up from 450).
ok?
Index: if_bgereg.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_bgereg.h,v
retrieving revision 1.127
diff -u -p -u -p -r1.127 if_bgereg.h
--- if_bgereg.h 11 Sep 2015 13:02:28 -0000 1.127
+++ if_bgereg.h 30 Sep 2015 11:14:09 -0000
@@ -2830,11 +2830,6 @@ struct bge_type {
#define BGE_TIMEOUT 100000
#define BGE_TXCONS_UNSET 0xFFFF /* impossible value */
-struct txdmamap_pool_entry {
- bus_dmamap_t dmamap;
- SLIST_ENTRY(txdmamap_pool_entry) link;
-};
-
#define ASF_ENABLE 1
#define ASF_NEW_HANDSHAKE 2
#define ASF_STACKUP 4
@@ -2934,11 +2929,11 @@ struct bge_softc {
int bge_txcnt;
struct timeout bge_timeout;
struct timeout bge_rxtimeout;
+ struct timeout bge_rxtimeout_jumbo;
u_int32_t bge_rx_discards;
u_int32_t bge_tx_discards;
u_int32_t bge_rx_inerrors;
u_int32_t bge_rx_overruns;
u_int32_t bge_tx_collisions;
- SLIST_HEAD(, txdmamap_pool_entry) txdma_list;
- struct txdmamap_pool_entry *txdma[BGE_TX_RING_CNT];
+ bus_dmamap_t bge_txdma[BGE_TX_RING_CNT];
};
Index: if_bge.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_bge.c,v
retrieving revision 1.369
diff -u -p -u -p -r1.369 if_bge.c
--- if_bge.c 19 Jul 2015 06:28:12 -0000 1.369
+++ if_bge.c 5 Oct 2015 01:06:21 -0000
@@ -141,7 +141,7 @@ void bge_tick(void *);
void bge_stats_update(struct bge_softc *);
void bge_stats_update_regs(struct bge_softc *);
int bge_cksum_pad(struct mbuf *);
-int bge_encap(struct bge_softc *, struct mbuf *, u_int32_t *);
+int bge_encap(struct bge_softc *, struct mbuf *, int *);
int bge_compact_dma_runt(struct mbuf *);
int bge_intr(void *);
@@ -1262,20 +1262,31 @@ uncreate:
return (1);
}
+/*
+ * When the refill timeout for a ring is active, that ring is so empty
+ * that no more packets can be received on it, so the interrupt handler
+ * will not attempt to refill it, meaning we don't need to protect against
+ * interrupts here.
+ */
+
void
bge_rxtick(void *arg)
{
struct bge_softc *sc = arg;
- int s;
- s = splnet();
if (ISSET(sc->bge_flags, BGE_RXRING_VALID) &&
if_rxr_inuse(&sc->bge_std_ring) <= 8)
bge_fill_rx_ring_std(sc);
+}
+
+void
+bge_rxtick_jumbo(void *arg)
+{
+ struct bge_softc *sc = arg;
+
if (ISSET(sc->bge_flags, BGE_JUMBO_RXRING_VALID) &&
if_rxr_inuse(&sc->bge_jumbo_ring) <= 8)
bge_fill_rx_ring_jumbo(sc);
- splx(s);
}
void
@@ -1410,7 +1421,7 @@ bge_fill_rx_ring_jumbo(struct bge_softc
* that now, then try again later.
*/
if (if_rxr_inuse(&sc->bge_jumbo_ring) <= 8)
- timeout_add(&sc->bge_rxtimeout, 1);
+ timeout_add(&sc->bge_rxtimeout_jumbo, 1);
}
void
@@ -1446,7 +1457,6 @@ void
bge_free_tx_ring(struct bge_softc *sc)
{
int i;
- struct txdmamap_pool_entry *dma;
if (!(sc->bge_flags & BGE_TXRING_VALID))
return;
@@ -1455,18 +1465,12 @@ bge_free_tx_ring(struct bge_softc *sc)
if (sc->bge_cdata.bge_tx_chain[i] != NULL) {
m_freem(sc->bge_cdata.bge_tx_chain[i]);
sc->bge_cdata.bge_tx_chain[i] = NULL;
- SLIST_INSERT_HEAD(&sc->txdma_list, sc->txdma[i],
- link);
- sc->txdma[i] = 0;
+ sc->bge_cdata.bge_tx_map[i] = NULL;
}
bzero(&sc->bge_rdata->bge_tx_ring[i],
sizeof(struct bge_tx_bd));
- }
- while ((dma = SLIST_FIRST(&sc->txdma_list))) {
- SLIST_REMOVE_HEAD(&sc->txdma_list, link);
- bus_dmamap_destroy(sc->bge_dmatag, dma->dmamap);
- free(dma, M_DEVBUF, 0);
+ bus_dmamap_destroy(sc->bge_dmatag, sc->bge_txdma[i]);
}
sc->bge_flags &= ~BGE_TXRING_VALID;
@@ -1476,9 +1480,7 @@ int
bge_init_tx_ring(struct bge_softc *sc)
{
int i;
- bus_dmamap_t dmamap;
bus_size_t txsegsz, txmaxsegsz;
- struct txdmamap_pool_entry *dma;
if (sc->bge_flags & BGE_TXRING_VALID)
return (0);
@@ -1505,22 +1507,10 @@ bge_init_tx_ring(struct bge_softc *sc)
txmaxsegsz = MCLBYTES;
}
- SLIST_INIT(&sc->txdma_list);
for (i = 0; i < BGE_TX_RING_CNT; i++) {
if (bus_dmamap_create(sc->bge_dmatag, txmaxsegsz,
- BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, &dmamap))
+ BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, &sc->bge_txdma[i]))
return (ENOBUFS);
- if (dmamap == NULL)
- panic("dmamap NULL in bge_init_tx_ring");
- dma = malloc(sizeof(*dma), M_DEVBUF, M_NOWAIT);
- if (dma == NULL) {
- printf("%s: can't alloc txdmamap_pool_entry\n",
- sc->bge_dev.dv_xname);
- bus_dmamap_destroy(sc->bge_dmatag, dmamap);
- return (ENOMEM);
- }
- dma->dmamap = dmamap;
- SLIST_INSERT_HEAD(&sc->txdma_list, dma, link);
}
sc->bge_flags |= BGE_TXRING_VALID;
@@ -3081,8 +3071,8 @@ bge_attach(struct device *parent, struct
/* Hookup IRQ last. */
DPRINTFN(5, ("pci_intr_establish\n"));
- sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET, bge_intr, sc,
- sc->bge_dev.dv_xname);
+ sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
+ bge_intr, sc, sc->bge_dev.dv_xname);
if (sc->bge_intrhand == NULL) {
printf(": couldn't establish interrupt");
if (intrstr != NULL)
@@ -3139,6 +3129,7 @@ bge_attach(struct device *parent, struct
timeout_set(&sc->bge_timeout, bge_tick, sc);
timeout_set(&sc->bge_rxtimeout, bge_rxtick, sc);
+ timeout_set(&sc->bge_rxtimeout_jumbo, bge_rxtick_jumbo, sc);
return;
fail_6:
@@ -3578,15 +3569,17 @@ bge_txeof(struct bge_softc *sc)
{
struct bge_tx_bd *cur_tx = NULL;
struct ifnet *ifp;
- struct txdmamap_pool_entry *dma;
+ bus_dmamap_t dmamap;
bus_addr_t offset, toff;
bus_size_t tlen;
- int tosync;
+ int tosync, freed, txcnt;
+ u_int32_t cons, newcons;
struct mbuf *m;
/* Nothing to do */
- if (sc->bge_tx_saved_considx ==
- sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx)
+ cons = sc->bge_tx_saved_considx;
+ newcons = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx;
+ if (cons == newcons)
return;
ifp = &sc->arpcom.ac_if;
@@ -3597,14 +3590,12 @@ bge_txeof(struct bge_softc *sc)
BUS_DMASYNC_POSTREAD);
offset = offsetof(struct bge_ring_data, bge_tx_ring);
- tosync = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx -
- sc->bge_tx_saved_considx;
+ tosync = newcons - cons;
- toff = offset + (sc->bge_tx_saved_considx * sizeof (struct bge_tx_bd));
+ toff = offset + (cons * sizeof (struct bge_tx_bd));
if (tosync < 0) {
- tlen = (BGE_TX_RING_CNT - sc->bge_tx_saved_considx) *
- sizeof (struct bge_tx_bd);
+ tlen = (BGE_TX_RING_CNT - cons) * sizeof (struct bge_tx_bd);
bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
toff, tlen, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
tosync = -tosync;
@@ -3618,34 +3609,35 @@ bge_txeof(struct bge_softc *sc)
* Go through our tx ring and free mbufs for those
* frames that have been sent.
*/
- while (sc->bge_tx_saved_considx !=
- sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx) {
- u_int32_t idx = 0;
-
- idx = sc->bge_tx_saved_considx;
- cur_tx = &sc->bge_rdata->bge_tx_ring[idx];
+ freed = 0;
+ while (cons != newcons) {
+ cur_tx = &sc->bge_rdata->bge_tx_ring[cons];
if (cur_tx->bge_flags & BGE_TXBDFLAG_END)
ifp->if_opackets++;
- m = sc->bge_cdata.bge_tx_chain[idx];
+ m = sc->bge_cdata.bge_tx_chain[cons];
if (m != NULL) {
- sc->bge_cdata.bge_tx_chain[idx] = NULL;
- dma = sc->txdma[idx];
- bus_dmamap_sync(sc->bge_dmatag, dma->dmamap, 0,
- dma->dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
- bus_dmamap_unload(sc->bge_dmatag, dma->dmamap);
- SLIST_INSERT_HEAD(&sc->txdma_list, dma, link);
- sc->txdma[idx] = NULL;
+ dmamap = sc->bge_cdata.bge_tx_map[cons];
+
+ sc->bge_cdata.bge_tx_chain[cons] = NULL;
+ sc->bge_cdata.bge_tx_map[cons] = NULL;
+ bus_dmamap_sync(sc->bge_dmatag, dmamap, 0,
+ dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(sc->bge_dmatag, dmamap);
m_freem(m);
}
- sc->bge_txcnt--;
- BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT);
+ freed++;
+ BGE_INC(cons, BGE_TX_RING_CNT);
}
- if (sc->bge_txcnt < BGE_TX_RING_CNT - 16)
+ txcnt = atomic_sub_int_nv(&sc->bge_txcnt, freed);
+
+ if (txcnt < BGE_TX_RING_CNT - 16)
ifp->if_flags &= ~IFF_OACTIVE;
- if (sc->bge_txcnt == 0)
+ if (txcnt == 0)
ifp->if_timer = 0;
+
+ sc->bge_tx_saved_considx = cons;
}
int
@@ -3693,8 +3685,11 @@ bge_intr(void *xsc)
if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
- BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
+ BGE_STS_BIT(sc, BGE_STS_LINK_EVT)) {
+ KERNEL_LOCK();
bge_link_upd(sc);
+ KERNEL_UNLOCK();
+ }
/* Re-enable interrupts. */
bge_writembx(sc, BGE_MBX_IRQ0_LO, statustag);
@@ -3706,8 +3701,11 @@ bge_intr(void *xsc)
/* Check TX ring producer/consumer */
bge_txeof(sc);
- if (!IFQ_IS_EMPTY(&ifp->if_snd))
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ KERNEL_LOCK();
bge_start(ifp);
+ KERNEL_UNLOCK();
+ }
}
return (1);
@@ -3987,16 +3985,15 @@ bge_cksum_pad(struct mbuf *m)
* pointers to descriptors.
*/
int
-bge_encap(struct bge_softc *sc, struct mbuf *m_head, u_int32_t *txidx)
+bge_encap(struct bge_softc *sc, struct mbuf *m_head, int *txinc)
{
struct bge_tx_bd *f = NULL;
u_int32_t frag, cur;
u_int16_t csum_flags = 0;
- struct txdmamap_pool_entry *dma;
- bus_dmamap_t dmamap;
+ bus_dmamap_t dmamap;
int i = 0;
- cur = frag = *txidx;
+ cur = frag = (sc->bge_tx_prodidx + *txinc) % BGE_TX_RING_CNT;
if (m_head->m_pkthdr.csum_flags) {
if (m_head->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
@@ -4026,10 +4023,7 @@ bge_encap(struct bge_softc *sc, struct m
return (ENOBUFS);
doit:
- dma = SLIST_FIRST(&sc->txdma_list);
- if (dma == NULL)
- return (ENOBUFS);
- dmamap = dma->dmamap;
+ dmamap = sc->bge_txdma[cur];
/*
* Start packing the mbufs in this chain into
@@ -4052,7 +4046,7 @@ doit:
}
/* Check if we have enough free send BDs. */
- if (sc->bge_txcnt + dmamap->dm_nsegs >= BGE_TX_RING_CNT)
+ if (sc->bge_txcnt + *txinc + dmamap->dm_nsegs >= BGE_TX_RING_CNT)
goto fail_unload;
for (i = 0; i < dmamap->dm_nsegs; i++) {
@@ -4084,11 +4078,9 @@ doit:
sc->bge_rdata->bge_tx_ring[cur].bge_flags |= BGE_TXBDFLAG_END;
sc->bge_cdata.bge_tx_chain[cur] = m_head;
- SLIST_REMOVE_HEAD(&sc->txdma_list, link);
- sc->txdma[cur] = dma;
- sc->bge_txcnt += dmamap->dm_nsegs;
-
- *txidx = frag;
+ sc->bge_cdata.bge_tx_map[cur] = dmamap;
+
+ *txinc += dmamap->dm_nsegs;
return (0);
@@ -4107,8 +4099,7 @@ bge_start(struct ifnet *ifp)
{
struct bge_softc *sc;
struct mbuf *m_head;
- u_int32_t prodidx;
- int pkts;
+ int txinc;
sc = ifp->if_softc;
@@ -4117,55 +4108,44 @@ bge_start(struct ifnet *ifp)
if (!BGE_STS_BIT(sc, BGE_STS_LINK))
return;
- prodidx = sc->bge_tx_prodidx;
-
- for (pkts = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
- if (sc->bge_txcnt > BGE_TX_RING_CNT - 16) {
- ifp->if_flags |= IFF_OACTIVE;
- break;
- }
-
+ txinc = 0;
+ while (1) {
IFQ_POLL(&ifp->if_snd, m_head);
if (m_head == NULL)
break;
- /*
- * Pack the data into the transmit ring. If we
- * don't have room, set the OACTIVE flag and wait
- * for the NIC to drain the ring.
- */
- if (bge_encap(sc, m_head, &prodidx)) {
- ifp->if_flags |= IFF_OACTIVE;
+ if (bge_encap(sc, m_head, &txinc))
break;
- }
/* now we are committed to transmit the packet */
IFQ_DEQUEUE(&ifp->if_snd, m_head);
- pkts++;
#if NBPFILTER > 0
- /*
- * If there's a BPF listener, bounce a copy of this frame
- * to him.
- */
if (ifp->if_bpf)
bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
#endif
}
- if (pkts == 0)
- return;
- /* Transmit */
- bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx);
- if (BGE_CHIPREV(sc->bge_chipid) == BGE_CHIPREV_5700_BX)
- bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx);
+ if (txinc != 0) {
+ int txcnt;
- sc->bge_tx_prodidx = prodidx;
+ /* Transmit */
+ sc->bge_tx_prodidx = (sc->bge_tx_prodidx + txinc) %
+ BGE_TX_RING_CNT;
+ bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx);
+ if (BGE_CHIPREV(sc->bge_chipid) == BGE_CHIPREV_5700_BX)
+ bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO,
+ sc->bge_tx_prodidx);
- /*
- * Set a timeout in case the chip goes out to lunch.
- */
- ifp->if_timer = 5;
+ txcnt = atomic_add_int_nv(&sc->bge_txcnt, txinc);
+ if (txcnt > BGE_TX_RING_CNT - 16)
+ ifp->if_flags |= IFF_OACTIVE;
+
+ /*
+ * Set a timeout in case the chip goes out to lunch.
+ */
+ ifp->if_timer = 5;
+ }
}
void
@@ -4580,6 +4560,7 @@ bge_stop(struct bge_softc *sc)
timeout_del(&sc->bge_timeout);
timeout_del(&sc->bge_rxtimeout);
+ timeout_del(&sc->bge_rxtimeout_jumbo);
ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
@@ -4639,6 +4620,8 @@ bge_stop(struct bge_softc *sc)
* Tell firmware we're shutting down.
*/
BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
+
+ intr_barrier(sc->bge_intrhand);
/* Free the RX lists. */
bge_free_rx_ring_std(sc);