> Date: Fri, 19 Jun 2020 16:14:45 +1000 > From: David Gwynne <da...@gwynne.id.au> > > the network stack doesnt really block timeouts from firing anymore. this > is especially true on MP systems, because timeouts fire on cpu0 and the > nettq thread could be somewhere else entirely. this means network > activity doesn't make the softclock lose ticks, which means we aren't > scaling rx ring activity like we think we are. > > the alternative way to detect livelock is when a driver queues packets > for the stack to process, if there's too many packets built up then the > input routine return value tells the driver to slow down. this enables > finer grained livelock detection too. the rx ring accounting is done per > rx ring, and each rx ring is tied to a specific nettq. if one of > them is going too fast it shouldn't affect the others. the tick > based detection was done system wide and punished all the drivers. > > the diff below converts all the drivers to the new mechanism, and > removes the old one. > > i really need tests for this one. can someone try an affected nic > on armv7? other than that i think im mostly interested in em and bge > tests. i've been kicking bge a bit here on a sparc64, but the more the > merrier.
Tested with fec(4) on armv7 and dwge(4) on arm64. No noticeable bad effects; tcpbench throughput is not measurable different. > Index: dev/fdt/if_dwge.c > =================================================================== > RCS file: /cvs/src/sys/dev/fdt/if_dwge.c,v > retrieving revision 1.2 > diff -u -p -r1.2 if_dwge.c > --- dev/fdt/if_dwge.c 7 Oct 2019 00:40:04 -0000 1.2 > +++ dev/fdt/if_dwge.c 19 Jun 2020 03:57:17 -0000 > @@ -907,13 +907,15 @@ dwge_rx_proc(struct dwge_softc *sc) > sc->sc_rx_cons++; > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > + > dwge_fill_rx_ring(sc); > > bus_dmamap_sync(sc->sc_dmat, DWGE_DMA_MAP(sc->sc_rxring), 0, > DWGE_DMA_LEN(sc->sc_rxring), > BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); > > - if_input(ifp, &ml); > } > > void > Index: dev/fdt/if_dwxe.c > =================================================================== > RCS file: /cvs/src/sys/dev/fdt/if_dwxe.c,v > retrieving revision 1.15 > diff -u -p -r1.15 if_dwxe.c > --- dev/fdt/if_dwxe.c 7 Oct 2019 00:40:04 -0000 1.15 > +++ dev/fdt/if_dwxe.c 19 Jun 2020 03:57:17 -0000 > @@ -966,13 +966,14 @@ dwxe_rx_proc(struct dwxe_softc *sc) > sc->sc_rx_cons++; > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > + > dwxe_fill_rx_ring(sc); > > bus_dmamap_sync(sc->sc_dmat, DWXE_DMA_MAP(sc->sc_rxring), 0, > DWXE_DMA_LEN(sc->sc_rxring), > BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); > - > - if_input(ifp, &ml); > } > > void > Index: dev/fdt/if_fec.c > =================================================================== > RCS file: /cvs/src/sys/dev/fdt/if_fec.c,v > retrieving revision 1.8 > diff -u -p -r1.8 if_fec.c > --- dev/fdt/if_fec.c 6 Feb 2019 22:59:06 -0000 1.8 > +++ dev/fdt/if_fec.c 19 Jun 2020 03:57:17 -0000 > @@ -1123,6 +1123,9 @@ fec_rx_proc(struct fec_softc *sc) > sc->sc_rx_cons++; > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > + > fec_fill_rx_ring(sc); > > bus_dmamap_sync(sc->sc_dmat, ENET_DMA_MAP(sc->sc_rxring), 0, > @@ -1131,8 +1134,6 @@ fec_rx_proc(struct fec_softc *sc) > > /* rx descriptors are ready */ > HWRITE4(sc, ENET_RDAR, ENET_RDAR_RDAR); > - > - if_input(ifp, &ml); > } > > void > Index: dev/fdt/if_mvneta.c > =================================================================== > RCS file: /cvs/src/sys/dev/fdt/if_mvneta.c,v > retrieving revision 1.10 > diff -u -p -r1.10 if_mvneta.c > --- dev/fdt/if_mvneta.c 22 May 2020 10:02:30 -0000 1.10 > +++ dev/fdt/if_mvneta.c 19 Jun 2020 03:57:17 -0000 > @@ -1363,9 +1363,10 @@ mvneta_rx_proc(struct mvneta_softc *sc) > sc->sc_rx_cons = MVNETA_RX_RING_NEXT(idx); > } > > - mvneta_fill_rx_ring(sc); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > > - if_input(ifp, &ml); > + mvneta_fill_rx_ring(sc); > } > > void > Index: dev/ic/bcmgenet.c > =================================================================== > RCS file: /cvs/src/sys/dev/ic/bcmgenet.c,v > retrieving revision 1.1 > diff -u -p -r1.1 bcmgenet.c > --- dev/ic/bcmgenet.c 14 Apr 2020 21:02:39 -0000 1.1 > +++ dev/ic/bcmgenet.c 19 Jun 2020 03:57:17 -0000 > @@ -729,8 +729,10 @@ genet_rxintr(struct genet_softc *sc, int > sc->sc_rx.next = index; > sc->sc_rx.pidx = pidx; > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > + > genet_fill_rx_ring(sc, qid); > - if_input(ifp, &ml); > } > } > > Index: dev/ic/gem.c > =================================================================== > RCS file: /cvs/src/sys/dev/ic/gem.c,v > retrieving revision 1.123 > diff -u -p -r1.123 gem.c > --- dev/ic/gem.c 7 Feb 2018 22:35:14 -0000 1.123 > +++ dev/ic/gem.c 19 Jun 2020 03:57:17 -0000 > @@ -1020,6 +1020,9 @@ gem_rint(struct gem_softc *sc) > ml_enqueue(&ml, m); > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > + > /* Update the receive pointer. */ > sc->sc_rx_cons = i; > gem_fill_rx_ring(sc); > @@ -1027,8 +1030,6 @@ gem_rint(struct gem_softc *sc) > > DPRINTF(sc, ("gem_rint: done sc->sc_rx_cons %d, complete %d\n", > sc->sc_rx_cons, bus_space_read_4(t, h, GEM_RX_COMPLETION))); > - > - if_input(ifp, &ml); > > return (1); > } > Index: dev/ic/hme.c > =================================================================== > RCS file: /cvs/src/sys/dev/ic/hme.c,v > retrieving revision 1.81 > diff -u -p -r1.81 hme.c > --- dev/ic/hme.c 22 Jan 2017 10:17:38 -0000 1.81 > +++ dev/ic/hme.c 19 Jun 2020 03:57:17 -0000 > @@ -844,7 +844,8 @@ hme_rint(struct hme_softc *sc) > ml_enqueue(&ml, m); > } > > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > > sc->sc_rx_cons = ri; > hme_fill_rx_ring(sc); > Index: dev/ic/re.c > =================================================================== > RCS file: /cvs/src/sys/dev/ic/re.c,v > retrieving revision 1.204 > diff -u -p -r1.204 re.c > --- dev/ic/re.c 19 Nov 2019 06:34:10 -0000 1.204 > +++ dev/ic/re.c 19 Jun 2020 03:57:18 -0000 > @@ -1398,10 +1398,12 @@ re_rxeof(struct rl_softc *sc) > ml_enqueue(&ml, m); > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->rl_ldata.rl_rx_ring); > + > sc->rl_ldata.rl_rx_considx = i; > re_rx_list_fill(sc); > > - if_input(ifp, &ml); > > return (rx); > } > Index: dev/ic/xl.c > =================================================================== > RCS file: /cvs/src/sys/dev/ic/xl.c,v > retrieving revision 1.132 > diff -u -p -r1.132 xl.c > --- dev/ic/xl.c 22 Jan 2017 10:17:38 -0000 1.132 > +++ dev/ic/xl.c 19 Jun 2020 03:57:18 -0000 > @@ -1213,6 +1213,9 @@ again: > ml_enqueue(&ml, m); > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->xl_cdata.xl_rx_ring); > + > xl_fill_rx_ring(sc); > > /* > @@ -1235,8 +1238,6 @@ again: > xl_fill_rx_ring(sc); > goto again; > } > - > - if_input(ifp, &ml); > } > > /* > Index: dev/pci/if_bge.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_bge.c,v > retrieving revision 1.389 > diff -u -p -r1.389 if_bge.c > --- dev/pci/if_bge.c 18 Jun 2020 17:13:31 -0000 1.389 > +++ dev/pci/if_bge.c 19 Jun 2020 03:57:18 -0000 > @@ -3462,6 +3462,7 @@ bge_rxeof(struct bge_softc *sc) > bus_addr_t offset, toff; > bus_size_t tlen; > int tosync; > + int livelocked; > > rx_cons = sc->bge_rx_saved_considx; > rx_prod = sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx; > @@ -3564,16 +3565,20 @@ bge_rxeof(struct bge_softc *sc) > > sc->bge_rx_saved_considx = rx_cons; > bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); > + > + livelocked = ifiq_input(&ifp->if_rcv, &ml); > if (stdcnt) { > if_rxr_put(&sc->bge_std_ring, stdcnt); > + if (livelocked) > + if_rxr_livelocked(&sc->bge_std_ring); > bge_fill_rx_ring_std(sc); > } > if (jumbocnt) { > if_rxr_put(&sc->bge_jumbo_ring, jumbocnt); > + if (livelocked) > + if_rxr_livelocked(&sc->bge_jumbo_ring); > bge_fill_rx_ring_jumbo(sc); > } > - > - if_input(ifp, &ml); > } > > void > Index: dev/pci/if_bnx.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v > retrieving revision 1.127 > diff -u -p -r1.127 if_bnx.c > --- dev/pci/if_bnx.c 17 May 2020 08:27:51 -0000 1.127 > +++ dev/pci/if_bnx.c 19 Jun 2020 03:57:18 -0000 > @@ -4467,6 +4467,9 @@ bnx_rx_int_next_rx: > BUS_SPACE_BARRIER_READ); > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->rx_ring); > + > /* No new packets to process. Refill the RX chain and exit. */ > sc->rx_cons = sw_cons; > if (!bnx_fill_rx_chain(sc)) > @@ -4477,8 +4480,6 @@ bnx_rx_int_next_rx: > sc->rx_bd_chain_map[i], 0, > sc->rx_bd_chain_map[i]->dm_mapsize, > BUS_DMASYNC_PREWRITE); > - > - if_input(ifp, &ml); > > DBPRINT(sc, BNX_INFO_RECV, "%s(exit): rx_prod = 0x%04X, " > "rx_cons = 0x%04X, rx_prod_bseq = 0x%08X\n", > Index: dev/pci/if_bnxt.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v > retrieving revision 1.24 > diff -u -p -r1.24 if_bnxt.c > --- dev/pci/if_bnxt.c 9 Jun 2020 07:03:12 -0000 1.24 > +++ dev/pci/if_bnxt.c 19 Jun 2020 03:57:18 -0000 > @@ -1345,12 +1345,15 @@ bnxt_intr(void *xsc) > if_rxr_put(&sc->sc_rxr[0], rxfree); > if_rxr_put(&sc->sc_rxr[1], agfree); > > + if (ifiq_input(&sc->sc_ac.ac_if.if_rcv, &ml)) { > + if_rxr_livelocked(&sc->sc_rxr[0]); > + if_rxr_livelocked(&sc->sc_rxr[1]); > + } > + > bnxt_rx_fill(sc); > if ((sc->sc_rx_cons == sc->sc_rx_prod) || > (sc->sc_rx_ag_cons == sc->sc_rx_ag_prod)) > timeout_add(&sc->sc_rx_refill, 0); > - > - if_input(&sc->sc_ac.ac_if, &ml); > } > if (txfree != 0) { > if (ifq_is_oactive(&ifp->if_snd)) > Index: dev/pci/if_bwfm_pci.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_bwfm_pci.c,v > retrieving revision 1.36 > diff -u -p -r1.36 if_bwfm_pci.c > --- dev/pci/if_bwfm_pci.c 7 Mar 2020 09:56:46 -0000 1.36 > +++ dev/pci/if_bwfm_pci.c 19 Jun 2020 03:57:18 -0000 > @@ -1914,7 +1914,9 @@ bwfm_pci_intr(void *v) > bwfm_pci_ring_rx(sc, &sc->sc_rx_complete, &ml); > bwfm_pci_ring_rx(sc, &sc->sc_tx_complete, &ml); > bwfm_pci_ring_rx(sc, &sc->sc_ctrl_complete, &ml); > - if_input(ifp, &ml); > + > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rxbuf_ring); > } > > #ifdef BWFM_DEBUG > Index: dev/pci/if_em.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_em.c,v > retrieving revision 1.353 > diff -u -p -r1.353 if_em.c > --- dev/pci/if_em.c 9 Jun 2020 07:36:10 -0000 1.353 > +++ dev/pci/if_em.c 19 Jun 2020 03:57:18 -0000 > @@ -3008,7 +3008,8 @@ em_rxeof(struct em_queue *que) > > que->rx.sc_rx_desc_tail = i; > > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&que->rx.sc_rx_ring); > > return (rv); > } > Index: dev/pci/if_nep.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_nep.c,v > retrieving revision 1.31 > diff -u -p -r1.31 if_nep.c > --- dev/pci/if_nep.c 9 Nov 2018 14:14:31 -0000 1.31 > +++ dev/pci/if_nep.c 19 Jun 2020 03:57:18 -0000 > @@ -1049,7 +1049,8 @@ nep_rx_proc(struct nep_softc *sc) > bus_dmamap_sync(sc->sc_dmat, NEP_DMA_MAP(sc->sc_rcring), 0, > NEP_DMA_LEN(sc->sc_rcring), BUS_DMASYNC_PREREAD); > > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > > nep_fill_rx_ring(sc); > > Index: dev/pci/if_oce.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_oce.c,v > retrieving revision 1.100 > diff -u -p -r1.100 if_oce.c > --- dev/pci/if_oce.c 27 Nov 2017 16:53:04 -0000 1.100 > +++ dev/pci/if_oce.c 19 Jun 2020 03:57:18 -0000 > @@ -1639,7 +1639,8 @@ oce_rxeof(struct oce_rq *rq, struct oce_ > ml_enqueue(&ml, m); > } > exit: > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&rq->rxring); > } > > void > Index: dev/pci/if_sis.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_sis.c,v > retrieving revision 1.135 > diff -u -p -r1.135 if_sis.c > --- dev/pci/if_sis.c 22 Jan 2017 10:17:38 -0000 1.135 > +++ dev/pci/if_sis.c 19 Jun 2020 03:57:18 -0000 > @@ -1447,7 +1447,8 @@ sis_rxeof(struct sis_softc *sc) > ml_enqueue(&ml, m); > } > > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sis_cdata.sis_rx_ring); > > sis_fill_rx_ring(sc); > } > Index: dev/pci/if_sk.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_sk.c,v > retrieving revision 1.189 > diff -u -p -r1.189 if_sk.c > --- dev/pci/if_sk.c 4 Jun 2017 04:29:23 -0000 1.189 > +++ dev/pci/if_sk.c 19 Jun 2020 03:57:18 -0000 > @@ -1637,9 +1637,11 @@ sk_rxeof(struct sk_if_softc *sc_if) > } > sc_if->sk_cdata.sk_rx_cons = cur; > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(rxr); > + > sk_fill_rx_ring(sc_if); > > - if_input(ifp, &ml); > } > > void > Index: dev/pci/if_vic.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_vic.c,v > retrieving revision 1.99 > diff -u -p -r1.99 if_vic.c > --- dev/pci/if_vic.c 9 Nov 2019 03:53:44 -0000 1.99 > +++ dev/pci/if_vic.c 19 Jun 2020 03:57:18 -0000 > @@ -867,7 +867,9 @@ vic_rx_proc(struct vic_softc *sc, int q) > VIC_INC(sc->sc_data->vd_rx[q].nextidx, sc->sc_nrxbuf); > } > > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rxq[q].ring); > + > vic_rx_fill(sc, q); > > bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_map, 0, sc->sc_dma_size, > Index: dev/pci/if_vr.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_vr.c,v > retrieving revision 1.153 > diff -u -p -r1.153 if_vr.c > --- dev/pci/if_vr.c 22 Jan 2017 10:17:38 -0000 1.153 > +++ dev/pci/if_vr.c 19 Jun 2020 03:57:18 -0000 > @@ -933,13 +933,14 @@ vr_rxeof(struct vr_softc *sc) > ml_enqueue(&ml, m); > } > > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rxring); > + > vr_fill_rx_ring(sc); > > bus_dmamap_sync(sc->sc_dmat, sc->sc_listmap.vrm_map, > 0, sc->sc_listmap.vrm_map->dm_mapsize, > BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); > - > - if_input(ifp, &ml); > } > > void > Index: dev/pv/if_vio.c > =================================================================== > RCS file: /cvs/src/sys/dev/pv/if_vio.c,v > retrieving revision 1.16 > diff -u -p -r1.16 if_vio.c > --- dev/pv/if_vio.c 31 Dec 2019 10:05:33 -0000 1.16 > +++ dev/pv/if_vio.c 19 Jun 2020 03:57:18 -0000 > @@ -1039,7 +1039,9 @@ vio_rxeof(struct vio_softc *sc) > m_freem(m0); > } > > - if_input(ifp, &ml); > + if (ifiq_input(&ifp->if_rcv, &ml)) > + if_rxr_livelocked(&sc->sc_rx_ring); > + > return r; > } > > Index: kern/kern_sysctl.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sysctl.c,v > retrieving revision 1.372 > diff -u -p -r1.372 kern_sysctl.c > --- kern/kern_sysctl.c 29 May 2020 01:22:53 -0000 1.372 > +++ kern/kern_sysctl.c 19 Jun 2020 03:57:18 -0000 > @@ -122,7 +122,6 @@ extern int nselcoll, fscale; > extern struct disklist_head disklist; > extern fixpt_t ccpu; > extern long numvnodes; > -extern u_int net_livelocks; > #if NAUDIO > 0 > extern int audio_record_enable; > #endif > @@ -644,7 +643,7 @@ kern_sysctl(int *name, u_int namelen, vo > dev = NODEV; > return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev)); > case KERN_NETLIVELOCKS: > - return (sysctl_rdint(oldp, oldlenp, newp, net_livelocks)); > + return (sysctl_rdint(oldp, oldlenp, newp, 0)); > case KERN_POOL_DEBUG: { > int old_pool_debug = pool_debug; > > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.607 > diff -u -p -r1.607 if.c > --- net/if.c 17 Jun 2020 06:45:22 -0000 1.607 > +++ net/if.c 19 Jun 2020 03:57:18 -0000 > @@ -230,9 +230,6 @@ int if_cloners_count; > struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE); > void if_hooks_run(struct task_list *); > > -struct timeout net_tick_to; > -void net_tick(void *); > -int net_livelocked(void); > int ifq_congestion; > > int netisr; > @@ -262,15 +259,11 @@ ifinit(void) > */ > if_idxmap_init(8); > > - timeout_set(&net_tick_to, net_tick, &net_tick_to); > - > for (i = 0; i < NET_TASKQ; i++) { > nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE); > if (nettqmp[i] == NULL) > panic("unable to create network taskq %d", i); > } > - > - net_tick(&net_tick_to); > } > > static struct if_idxmap if_idxmap = { > @@ -3179,30 +3260,6 @@ if_addrhooks_run(struct ifnet *ifp) > if_hooks_run(&ifp->if_addrhooks); > } > > -int net_ticks; > -u_int net_livelocks; > - > -void > -net_tick(void *null) > -{ > - extern int ticks; > - > - if (ticks - net_ticks > 1) > - net_livelocks++; > - > - net_ticks = ticks; > - > - timeout_add(&net_tick_to, 1); > -} > - > -int > -net_livelocked(void) > -{ > - extern int ticks; > - > - return (ticks - net_ticks > 1); > -} > - > void > if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm) > { > @@ -3220,12 +3277,7 @@ if_rxr_adjust_cwm(struct if_rxring *rxr) > { > extern int ticks; > > - if (net_livelocked()) { > - if (rxr->rxr_cwm > rxr->rxr_lwm) > - rxr->rxr_cwm--; > - else > - return; > - } else if (rxr->rxr_alive >= rxr->rxr_lwm) > + if (rxr->rxr_alive >= rxr->rxr_lwm) > return; > else if (rxr->rxr_cwm < rxr->rxr_hwm) > rxr->rxr_cwm++; > >