Re: ifq serialisation, was Re: taskctx and revisiting if_start serialisation
> Date: Tue, 8 Dec 2015 21:58:49 +1000 > From: David Gwynne> > On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote: > > the current code for serialising if_start calls for mpsafe nics does what > > it says. > > as per mpi@s suggestion, this makes the ifq code responsible for > the task serialisation. > > all the machinery is there, but it provides a minimal step toward > coordination of the oactive flag. the only concession is if a driver > wants to clear oactive and run its start routine again it can call > ifq_restart() to have it serialised with the stacks calls to the > start routine. > > if there's a desire for a driver to run all of its txeof serialised > with its start routine, we'll figure out the best way to provide > that when the problem really occurs. > > if_start_barrier is now called ifq_barrier too.i Bleah. I just spent two days in bed with a flu. I still need to write down my conclusions from the experiments I did with forwarding during n2k15. But one of the things I noticed that the taskctx serialization reduced the forwarding performance a by something like 10%. I'd like to see what happens with this version of the diff. I think I can recreate setup that I used during n2k15 with em(4) instead of ix(4) at home. But it won't happen until after this weekend. > Index: dev/pci/if_myx.c > === > RCS file: /cvs/src/sys/dev/pci/if_myx.c,v > retrieving revision 1.90 > diff -u -p -r1.90 if_myx.c > --- dev/pci/if_myx.c 3 Dec 2015 12:45:56 - 1.90 > +++ dev/pci/if_myx.c 8 Dec 2015 11:31:09 - > @@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc) > ifq_clr_oactive(>if_snd); > SET(ifp->if_flags, IFF_RUNNING); > myx_iff(sc); > - myx_start(ifp); > + if_start(ifp); > > return; > > @@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc) > int s; > int ring; > > + CLR(ifp->if_flags, IFF_RUNNING); > + > bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, > BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); > sc->sc_linkdown = sts->ms_linkdown; > @@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc) > } > > ifq_clr_oactive(>if_snd); > - CLR(ifp->if_flags, IFF_RUNNING); > - if_start_barrier(ifp); > + ifq_barrier(>if_snd); > > for (ring = 0; ring < 2; ring++) { > struct myx_rx_ring *mrr = >sc_rx_ring[ring]; > @@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_ > sc->sc_tx_ring_cons = idx; > sc->sc_tx_cons = cons; > > - ifq_clr_oactive(>if_snd); > - if (!ifq_empty(>if_snd)) > - if_start(ifp); > + if (ifq_is_oactive(>if_snd)) > + ifq_restart(>if_snd); > } > > void > Index: dev/pci/if_bnx.c > === > RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v > retrieving revision 1.118 > diff -u -p -r1.118 if_bnx.c > --- dev/pci/if_bnx.c 5 Dec 2015 16:23:37 - 1.118 > +++ dev/pci/if_bnx.c 8 Dec 2015 11:31:09 - > @@ -871,6 +871,7 @@ bnx_attachhook(void *xsc) > ifp = >arpcom.ac_if; > ifp->if_softc = sc; > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > + ifp->if_xflags = IFXF_MPSAFE; > ifp->if_ioctl = bnx_ioctl; > ifp->if_start = bnx_start; > ifp->if_watchdog = bnx_watchdog; > @@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc) > > used = atomic_sub_int_nv(>used_tx_bd, freed); > > + sc->tx_cons = sw_tx_cons; > + > /* Clear the TX timeout timer. */ > if (used == 0) > ifp->if_timer = 0; > > - /* Clear the tx hardware queue full flag. */ > - if (used < sc->max_tx_bd) { > - DBRUNIF(ifq_is_oactive(>if_snd), > - printf("%s: Open TX chain! %d/%d (used/total)\n", > - sc->bnx_dev.dv_xname, used, > - sc->max_tx_bd)); > - ifq_clr_oactive(>if_snd); > - } > - > - sc->tx_cons = sw_tx_cons; > + if (ifq_is_oactive(>if_snd)) > + ifq_restart(>if_snd); > } > > > // > @@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp) > int used; > u_int16_t tx_prod, tx_chain_prod; > > - /* If there's no link or the transmit queue is empty then just exit. */ > - if (!sc->bnx_link || IFQ_IS_EMPTY(>if_snd)) { > - DBPRINT(sc, BNX_INFO_SEND, > - "%s(): No link or transmit queue empty.\n", __FUNCTION__); > + if (!sc->bnx_link) { > + ifq_purge(>if_snd); > goto bnx_start_exit; > } > > Index: net/if.c > === > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.423 > diff -u -p
ifq serialisation, was Re: taskctx and revisiting if_start serialisation
On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote: > the current code for serialising if_start calls for mpsafe nics does what it > says. as per mpi@s suggestion, this makes the ifq code responsible for the task serialisation. all the machinery is there, but it provides a minimal step toward coordination of the oactive flag. the only concession is if a driver wants to clear oactive and run its start routine again it can call ifq_restart() to have it serialised with the stacks calls to the start routine. if there's a desire for a driver to run all of its txeof serialised with its start routine, we'll figure out the best way to provide that when the problem really occurs. if_start_barrier is now called ifq_barrier too.i Index: dev/pci/if_myx.c === RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.90 diff -u -p -r1.90 if_myx.c --- dev/pci/if_myx.c3 Dec 2015 12:45:56 - 1.90 +++ dev/pci/if_myx.c8 Dec 2015 11:31:09 - @@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc) ifq_clr_oactive(>if_snd); SET(ifp->if_flags, IFF_RUNNING); myx_iff(sc); - myx_start(ifp); + if_start(ifp); return; @@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc) int s; int ring; + CLR(ifp->if_flags, IFF_RUNNING); + bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); sc->sc_linkdown = sts->ms_linkdown; @@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc) } ifq_clr_oactive(>if_snd); - CLR(ifp->if_flags, IFF_RUNNING); - if_start_barrier(ifp); + ifq_barrier(>if_snd); for (ring = 0; ring < 2; ring++) { struct myx_rx_ring *mrr = >sc_rx_ring[ring]; @@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_ sc->sc_tx_ring_cons = idx; sc->sc_tx_cons = cons; - ifq_clr_oactive(>if_snd); - if (!ifq_empty(>if_snd)) - if_start(ifp); + if (ifq_is_oactive(>if_snd)) + ifq_restart(>if_snd); } void Index: dev/pci/if_bnx.c === RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v retrieving revision 1.118 diff -u -p -r1.118 if_bnx.c --- dev/pci/if_bnx.c5 Dec 2015 16:23:37 - 1.118 +++ dev/pci/if_bnx.c8 Dec 2015 11:31:09 - @@ -871,6 +871,7 @@ bnx_attachhook(void *xsc) ifp = >arpcom.ac_if; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_xflags = IFXF_MPSAFE; ifp->if_ioctl = bnx_ioctl; ifp->if_start = bnx_start; ifp->if_watchdog = bnx_watchdog; @@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc) used = atomic_sub_int_nv(>used_tx_bd, freed); + sc->tx_cons = sw_tx_cons; + /* Clear the TX timeout timer. */ if (used == 0) ifp->if_timer = 0; - /* Clear the tx hardware queue full flag. */ - if (used < sc->max_tx_bd) { - DBRUNIF(ifq_is_oactive(>if_snd), - printf("%s: Open TX chain! %d/%d (used/total)\n", - sc->bnx_dev.dv_xname, used, - sc->max_tx_bd)); - ifq_clr_oactive(>if_snd); - } - - sc->tx_cons = sw_tx_cons; + if (ifq_is_oactive(>if_snd)) + ifq_restart(>if_snd); } // @@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp) int used; u_int16_t tx_prod, tx_chain_prod; - /* If there's no link or the transmit queue is empty then just exit. */ - if (!sc->bnx_link || IFQ_IS_EMPTY(>if_snd)) { - DBPRINT(sc, BNX_INFO_SEND, - "%s(): No link or transmit queue empty.\n", __FUNCTION__); + if (!sc->bnx_link) { + ifq_purge(>if_snd); goto bnx_start_exit; } Index: net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.423 diff -u -p -r1.423 if.c --- net/if.c8 Dec 2015 10:06:12 - 1.423 +++ net/if.c8 Dec 2015 11:31:09 - @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.423 2015/12/08 10:06:12 dlg Exp $*/ +/* $OpenBSD: if.c,v 1.422 2015/12/05 10:07:55 tedu Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -153,7 +153,6 @@ voidif_input_process(void *); void ifa_print_all(void); #endif -void if_start_mpsafe(struct ifnet *ifp); void if_start_locked(struct ifnet *ifp); /* @@ -512,7 +511,7 @@ if_attach_common(struct ifnet *ifp) TAILQ_INIT(>if_addrlist); TAILQ_INIT(>if_maddrlist); - ifq_init(>if_snd); +