the network stack doesnt really block timeouts from firing anymore. this is especially true on MP systems, because timeouts fire on cpu0 and the nettq thread could be somewhere else entirely. this means network activity doesn't make the softclock lose ticks, which means we aren't scaling rx ring activity like we think we are.
the alternative way to detect livelock is when a driver queues packets for the stack to process, if there's too many packets built up then the input routine return value tells the driver to slow down. this enables finer grained livelock detection too. the rx ring accounting is done per rx ring, and each rx ring is tied to a specific nettq. if one of them is going too fast it shouldn't affect the others. the tick based detection was done system wide and punished all the drivers. the diff below converts all the drivers to the new mechanism, and removes the old one. i really need tests for this one. can someone try an affected nic on armv7? other than that i think im mostly interested in em and bge tests. i've been kicking bge a bit here on a sparc64, but the more the merrier. Index: dev/fdt/if_dwge.c =================================================================== RCS file: /cvs/src/sys/dev/fdt/if_dwge.c,v retrieving revision 1.2 diff -u -p -r1.2 if_dwge.c --- dev/fdt/if_dwge.c 7 Oct 2019 00:40:04 -0000 1.2 +++ dev/fdt/if_dwge.c 19 Jun 2020 03:57:17 -0000 @@ -907,13 +907,15 @@ dwge_rx_proc(struct dwge_softc *sc) sc->sc_rx_cons++; } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); + dwge_fill_rx_ring(sc); bus_dmamap_sync(sc->sc_dmat, DWGE_DMA_MAP(sc->sc_rxring), 0, DWGE_DMA_LEN(sc->sc_rxring), BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - if_input(ifp, &ml); } void Index: dev/fdt/if_dwxe.c =================================================================== RCS file: /cvs/src/sys/dev/fdt/if_dwxe.c,v retrieving revision 1.15 diff -u -p -r1.15 if_dwxe.c --- dev/fdt/if_dwxe.c 7 Oct 2019 00:40:04 -0000 1.15 +++ dev/fdt/if_dwxe.c 19 Jun 2020 03:57:17 -0000 @@ -966,13 +966,14 @@ dwxe_rx_proc(struct dwxe_softc *sc) sc->sc_rx_cons++; } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); + dwxe_fill_rx_ring(sc); bus_dmamap_sync(sc->sc_dmat, DWXE_DMA_MAP(sc->sc_rxring), 0, DWXE_DMA_LEN(sc->sc_rxring), BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - if_input(ifp, &ml); } void Index: dev/fdt/if_fec.c =================================================================== RCS file: /cvs/src/sys/dev/fdt/if_fec.c,v retrieving revision 1.8 diff -u -p -r1.8 if_fec.c --- dev/fdt/if_fec.c 6 Feb 2019 22:59:06 -0000 1.8 +++ dev/fdt/if_fec.c 19 Jun 2020 03:57:17 -0000 @@ -1123,6 +1123,9 @@ fec_rx_proc(struct fec_softc *sc) sc->sc_rx_cons++; } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); + fec_fill_rx_ring(sc); bus_dmamap_sync(sc->sc_dmat, ENET_DMA_MAP(sc->sc_rxring), 0, @@ -1131,8 +1134,6 @@ fec_rx_proc(struct fec_softc *sc) /* rx descriptors are ready */ HWRITE4(sc, ENET_RDAR, ENET_RDAR_RDAR); - - if_input(ifp, &ml); } void Index: dev/fdt/if_mvneta.c =================================================================== RCS file: /cvs/src/sys/dev/fdt/if_mvneta.c,v retrieving revision 1.10 diff -u -p -r1.10 if_mvneta.c --- dev/fdt/if_mvneta.c 22 May 2020 10:02:30 -0000 1.10 +++ dev/fdt/if_mvneta.c 19 Jun 2020 03:57:17 -0000 @@ -1363,9 +1363,10 @@ mvneta_rx_proc(struct mvneta_softc *sc) sc->sc_rx_cons = MVNETA_RX_RING_NEXT(idx); } - mvneta_fill_rx_ring(sc); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); - if_input(ifp, &ml); + mvneta_fill_rx_ring(sc); } void Index: dev/ic/bcmgenet.c =================================================================== RCS file: /cvs/src/sys/dev/ic/bcmgenet.c,v retrieving revision 1.1 diff -u -p -r1.1 bcmgenet.c --- dev/ic/bcmgenet.c 14 Apr 2020 21:02:39 -0000 1.1 +++ dev/ic/bcmgenet.c 19 Jun 2020 03:57:17 -0000 @@ -729,8 +729,10 @@ genet_rxintr(struct genet_softc *sc, int sc->sc_rx.next = index; sc->sc_rx.pidx = pidx; + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); + genet_fill_rx_ring(sc, qid); - if_input(ifp, &ml); } } Index: dev/ic/gem.c =================================================================== RCS file: /cvs/src/sys/dev/ic/gem.c,v retrieving revision 1.123 diff -u -p -r1.123 gem.c --- dev/ic/gem.c 7 Feb 2018 22:35:14 -0000 1.123 +++ dev/ic/gem.c 19 Jun 2020 03:57:17 -0000 @@ -1020,6 +1020,9 @@ gem_rint(struct gem_softc *sc) ml_enqueue(&ml, m); } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); + /* Update the receive pointer. */ sc->sc_rx_cons = i; gem_fill_rx_ring(sc); @@ -1027,8 +1030,6 @@ gem_rint(struct gem_softc *sc) DPRINTF(sc, ("gem_rint: done sc->sc_rx_cons %d, complete %d\n", sc->sc_rx_cons, bus_space_read_4(t, h, GEM_RX_COMPLETION))); - - if_input(ifp, &ml); return (1); } Index: dev/ic/hme.c =================================================================== RCS file: /cvs/src/sys/dev/ic/hme.c,v retrieving revision 1.81 diff -u -p -r1.81 hme.c --- dev/ic/hme.c 22 Jan 2017 10:17:38 -0000 1.81 +++ dev/ic/hme.c 19 Jun 2020 03:57:17 -0000 @@ -844,7 +844,8 @@ hme_rint(struct hme_softc *sc) ml_enqueue(&ml, m); } - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); sc->sc_rx_cons = ri; hme_fill_rx_ring(sc); Index: dev/ic/re.c =================================================================== RCS file: /cvs/src/sys/dev/ic/re.c,v retrieving revision 1.204 diff -u -p -r1.204 re.c --- dev/ic/re.c 19 Nov 2019 06:34:10 -0000 1.204 +++ dev/ic/re.c 19 Jun 2020 03:57:18 -0000 @@ -1398,10 +1398,12 @@ re_rxeof(struct rl_softc *sc) ml_enqueue(&ml, m); } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->rl_ldata.rl_rx_ring); + sc->rl_ldata.rl_rx_considx = i; re_rx_list_fill(sc); - if_input(ifp, &ml); return (rx); } Index: dev/ic/xl.c =================================================================== RCS file: /cvs/src/sys/dev/ic/xl.c,v retrieving revision 1.132 diff -u -p -r1.132 xl.c --- dev/ic/xl.c 22 Jan 2017 10:17:38 -0000 1.132 +++ dev/ic/xl.c 19 Jun 2020 03:57:18 -0000 @@ -1213,6 +1213,9 @@ again: ml_enqueue(&ml, m); } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->xl_cdata.xl_rx_ring); + xl_fill_rx_ring(sc); /* @@ -1235,8 +1238,6 @@ again: xl_fill_rx_ring(sc); goto again; } - - if_input(ifp, &ml); } /* Index: dev/pci/if_bge.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_bge.c,v retrieving revision 1.389 diff -u -p -r1.389 if_bge.c --- dev/pci/if_bge.c 18 Jun 2020 17:13:31 -0000 1.389 +++ dev/pci/if_bge.c 19 Jun 2020 03:57:18 -0000 @@ -3462,6 +3462,7 @@ bge_rxeof(struct bge_softc *sc) bus_addr_t offset, toff; bus_size_t tlen; int tosync; + int livelocked; rx_cons = sc->bge_rx_saved_considx; rx_prod = sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx; @@ -3564,16 +3565,20 @@ bge_rxeof(struct bge_softc *sc) sc->bge_rx_saved_considx = rx_cons; bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); + + livelocked = ifiq_input(&ifp->if_rcv, &ml); if (stdcnt) { if_rxr_put(&sc->bge_std_ring, stdcnt); + if (livelocked) + if_rxr_livelocked(&sc->bge_std_ring); bge_fill_rx_ring_std(sc); } if (jumbocnt) { if_rxr_put(&sc->bge_jumbo_ring, jumbocnt); + if (livelocked) + if_rxr_livelocked(&sc->bge_jumbo_ring); bge_fill_rx_ring_jumbo(sc); } - - if_input(ifp, &ml); } void Index: dev/pci/if_bnx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v retrieving revision 1.127 diff -u -p -r1.127 if_bnx.c --- dev/pci/if_bnx.c 17 May 2020 08:27:51 -0000 1.127 +++ dev/pci/if_bnx.c 19 Jun 2020 03:57:18 -0000 @@ -4467,6 +4467,9 @@ bnx_rx_int_next_rx: BUS_SPACE_BARRIER_READ); } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->rx_ring); + /* No new packets to process. Refill the RX chain and exit. */ sc->rx_cons = sw_cons; if (!bnx_fill_rx_chain(sc)) @@ -4477,8 +4480,6 @@ bnx_rx_int_next_rx: sc->rx_bd_chain_map[i], 0, sc->rx_bd_chain_map[i]->dm_mapsize, BUS_DMASYNC_PREWRITE); - - if_input(ifp, &ml); DBPRINT(sc, BNX_INFO_RECV, "%s(exit): rx_prod = 0x%04X, " "rx_cons = 0x%04X, rx_prod_bseq = 0x%08X\n", Index: dev/pci/if_bnxt.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v retrieving revision 1.24 diff -u -p -r1.24 if_bnxt.c --- dev/pci/if_bnxt.c 9 Jun 2020 07:03:12 -0000 1.24 +++ dev/pci/if_bnxt.c 19 Jun 2020 03:57:18 -0000 @@ -1345,12 +1345,15 @@ bnxt_intr(void *xsc) if_rxr_put(&sc->sc_rxr[0], rxfree); if_rxr_put(&sc->sc_rxr[1], agfree); + if (ifiq_input(&sc->sc_ac.ac_if.if_rcv, &ml)) { + if_rxr_livelocked(&sc->sc_rxr[0]); + if_rxr_livelocked(&sc->sc_rxr[1]); + } + bnxt_rx_fill(sc); if ((sc->sc_rx_cons == sc->sc_rx_prod) || (sc->sc_rx_ag_cons == sc->sc_rx_ag_prod)) timeout_add(&sc->sc_rx_refill, 0); - - if_input(&sc->sc_ac.ac_if, &ml); } if (txfree != 0) { if (ifq_is_oactive(&ifp->if_snd)) Index: dev/pci/if_bwfm_pci.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_bwfm_pci.c,v retrieving revision 1.36 diff -u -p -r1.36 if_bwfm_pci.c --- dev/pci/if_bwfm_pci.c 7 Mar 2020 09:56:46 -0000 1.36 +++ dev/pci/if_bwfm_pci.c 19 Jun 2020 03:57:18 -0000 @@ -1914,7 +1914,9 @@ bwfm_pci_intr(void *v) bwfm_pci_ring_rx(sc, &sc->sc_rx_complete, &ml); bwfm_pci_ring_rx(sc, &sc->sc_tx_complete, &ml); bwfm_pci_ring_rx(sc, &sc->sc_ctrl_complete, &ml); - if_input(ifp, &ml); + + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rxbuf_ring); } #ifdef BWFM_DEBUG Index: dev/pci/if_em.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.c,v retrieving revision 1.353 diff -u -p -r1.353 if_em.c --- dev/pci/if_em.c 9 Jun 2020 07:36:10 -0000 1.353 +++ dev/pci/if_em.c 19 Jun 2020 03:57:18 -0000 @@ -3008,7 +3008,8 @@ em_rxeof(struct em_queue *que) que->rx.sc_rx_desc_tail = i; - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&que->rx.sc_rx_ring); return (rv); } Index: dev/pci/if_nep.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_nep.c,v retrieving revision 1.31 diff -u -p -r1.31 if_nep.c --- dev/pci/if_nep.c 9 Nov 2018 14:14:31 -0000 1.31 +++ dev/pci/if_nep.c 19 Jun 2020 03:57:18 -0000 @@ -1049,7 +1049,8 @@ nep_rx_proc(struct nep_softc *sc) bus_dmamap_sync(sc->sc_dmat, NEP_DMA_MAP(sc->sc_rcring), 0, NEP_DMA_LEN(sc->sc_rcring), BUS_DMASYNC_PREREAD); - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); nep_fill_rx_ring(sc); Index: dev/pci/if_oce.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_oce.c,v retrieving revision 1.100 diff -u -p -r1.100 if_oce.c --- dev/pci/if_oce.c 27 Nov 2017 16:53:04 -0000 1.100 +++ dev/pci/if_oce.c 19 Jun 2020 03:57:18 -0000 @@ -1639,7 +1639,8 @@ oce_rxeof(struct oce_rq *rq, struct oce_ ml_enqueue(&ml, m); } exit: - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&rq->rxring); } void Index: dev/pci/if_sis.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_sis.c,v retrieving revision 1.135 diff -u -p -r1.135 if_sis.c --- dev/pci/if_sis.c 22 Jan 2017 10:17:38 -0000 1.135 +++ dev/pci/if_sis.c 19 Jun 2020 03:57:18 -0000 @@ -1447,7 +1447,8 @@ sis_rxeof(struct sis_softc *sc) ml_enqueue(&ml, m); } - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sis_cdata.sis_rx_ring); sis_fill_rx_ring(sc); } Index: dev/pci/if_sk.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_sk.c,v retrieving revision 1.189 diff -u -p -r1.189 if_sk.c --- dev/pci/if_sk.c 4 Jun 2017 04:29:23 -0000 1.189 +++ dev/pci/if_sk.c 19 Jun 2020 03:57:18 -0000 @@ -1637,9 +1637,11 @@ sk_rxeof(struct sk_if_softc *sc_if) } sc_if->sk_cdata.sk_rx_cons = cur; + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(rxr); + sk_fill_rx_ring(sc_if); - if_input(ifp, &ml); } void Index: dev/pci/if_vic.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vic.c,v retrieving revision 1.99 diff -u -p -r1.99 if_vic.c --- dev/pci/if_vic.c 9 Nov 2019 03:53:44 -0000 1.99 +++ dev/pci/if_vic.c 19 Jun 2020 03:57:18 -0000 @@ -867,7 +867,9 @@ vic_rx_proc(struct vic_softc *sc, int q) VIC_INC(sc->sc_data->vd_rx[q].nextidx, sc->sc_nrxbuf); } - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rxq[q].ring); + vic_rx_fill(sc, q); bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_map, 0, sc->sc_dma_size, Index: dev/pci/if_vr.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vr.c,v retrieving revision 1.153 diff -u -p -r1.153 if_vr.c --- dev/pci/if_vr.c 22 Jan 2017 10:17:38 -0000 1.153 +++ dev/pci/if_vr.c 19 Jun 2020 03:57:18 -0000 @@ -933,13 +933,14 @@ vr_rxeof(struct vr_softc *sc) ml_enqueue(&ml, m); } + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rxring); + vr_fill_rx_ring(sc); bus_dmamap_sync(sc->sc_dmat, sc->sc_listmap.vrm_map, 0, sc->sc_listmap.vrm_map->dm_mapsize, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - if_input(ifp, &ml); } void Index: dev/pv/if_vio.c =================================================================== RCS file: /cvs/src/sys/dev/pv/if_vio.c,v retrieving revision 1.16 diff -u -p -r1.16 if_vio.c --- dev/pv/if_vio.c 31 Dec 2019 10:05:33 -0000 1.16 +++ dev/pv/if_vio.c 19 Jun 2020 03:57:18 -0000 @@ -1039,7 +1039,9 @@ vio_rxeof(struct vio_softc *sc) m_freem(m0); } - if_input(ifp, &ml); + if (ifiq_input(&ifp->if_rcv, &ml)) + if_rxr_livelocked(&sc->sc_rx_ring); + return r; } Index: kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.372 diff -u -p -r1.372 kern_sysctl.c --- kern/kern_sysctl.c 29 May 2020 01:22:53 -0000 1.372 +++ kern/kern_sysctl.c 19 Jun 2020 03:57:18 -0000 @@ -122,7 +122,6 @@ extern int nselcoll, fscale; extern struct disklist_head disklist; extern fixpt_t ccpu; extern long numvnodes; -extern u_int net_livelocks; #if NAUDIO > 0 extern int audio_record_enable; #endif @@ -644,7 +643,7 @@ kern_sysctl(int *name, u_int namelen, vo dev = NODEV; return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev)); case KERN_NETLIVELOCKS: - return (sysctl_rdint(oldp, oldlenp, newp, net_livelocks)); + return (sysctl_rdint(oldp, oldlenp, newp, 0)); case KERN_POOL_DEBUG: { int old_pool_debug = pool_debug; Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.607 diff -u -p -r1.607 if.c --- net/if.c 17 Jun 2020 06:45:22 -0000 1.607 +++ net/if.c 19 Jun 2020 03:57:18 -0000 @@ -230,9 +230,6 @@ int if_cloners_count; struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE); void if_hooks_run(struct task_list *); -struct timeout net_tick_to; -void net_tick(void *); -int net_livelocked(void); int ifq_congestion; int netisr; @@ -262,15 +259,11 @@ ifinit(void) */ if_idxmap_init(8); - timeout_set(&net_tick_to, net_tick, &net_tick_to); - for (i = 0; i < NET_TASKQ; i++) { nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE); if (nettqmp[i] == NULL) panic("unable to create network taskq %d", i); } - - net_tick(&net_tick_to); } static struct if_idxmap if_idxmap = { @@ -3179,30 +3260,6 @@ if_addrhooks_run(struct ifnet *ifp) if_hooks_run(&ifp->if_addrhooks); } -int net_ticks; -u_int net_livelocks; - -void -net_tick(void *null) -{ - extern int ticks; - - if (ticks - net_ticks > 1) - net_livelocks++; - - net_ticks = ticks; - - timeout_add(&net_tick_to, 1); -} - -int -net_livelocked(void) -{ - extern int ticks; - - return (ticks - net_ticks > 1); -} - void if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm) { @@ -3220,12 +3277,7 @@ if_rxr_adjust_cwm(struct if_rxring *rxr) { extern int ticks; - if (net_livelocked()) { - if (rxr->rxr_cwm > rxr->rxr_lwm) - rxr->rxr_cwm--; - else - return; - } else if (rxr->rxr_alive >= rxr->rxr_lwm) + if (rxr->rxr_alive >= rxr->rxr_lwm) return; else if (rxr->rxr_cwm < rxr->rxr_hwm) rxr->rxr_cwm++;