Re: ifq serialisation, was Re: taskctx and revisiting if_start serialisation

2015-12-09 Thread Mark Kettenis
> Date: Tue, 8 Dec 2015 21:58:49 +1000
> From: David Gwynne 
> 
> On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote:
> > the current code for serialising if_start calls for mpsafe nics does what 
> > it says.
> 
> as per mpi@s suggestion, this makes the ifq code responsible for
> the task serialisation.
> 
> all the machinery is there, but it provides a minimal step toward
> coordination of the oactive flag. the only concession is if a driver
> wants to clear oactive and run its start routine again it can call
> ifq_restart() to have it serialised with the stacks calls to the
> start routine.
> 
> if there's a desire for a driver to run all of its txeof serialised
> with its start routine, we'll figure out the best way to provide
> that when the problem really occurs.
> 
> if_start_barrier is now called ifq_barrier too.i

Bleah.  I just spent two days in bed with a flu.  I still need to
write down my conclusions from the experiments I did with forwarding
during n2k15.  But one of the things I noticed that the taskctx
serialization reduced the forwarding performance a by something like
10%.  I'd like to see what happens with this version of the diff.  I
think I can recreate setup that I used during n2k15 with em(4) instead
of ix(4) at home.  But it won't happen until after this weekend.

> Index: dev/pci/if_myx.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.90
> diff -u -p -r1.90 if_myx.c
> --- dev/pci/if_myx.c  3 Dec 2015 12:45:56 -   1.90
> +++ dev/pci/if_myx.c  8 Dec 2015 11:31:09 -
> @@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc)
>   ifq_clr_oactive(>if_snd);
>   SET(ifp->if_flags, IFF_RUNNING);
>   myx_iff(sc);
> - myx_start(ifp);
> + if_start(ifp);
>  
>   return;
>  
> @@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc)
>   int  s;
>   int  ring;
>  
> + CLR(ifp->if_flags, IFF_RUNNING);
> +
>   bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
>   BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
>   sc->sc_linkdown = sts->ms_linkdown;
> @@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc)
>   }
>  
>   ifq_clr_oactive(>if_snd);
> - CLR(ifp->if_flags, IFF_RUNNING);
> - if_start_barrier(ifp);
> + ifq_barrier(>if_snd);
>  
>   for (ring = 0; ring < 2; ring++) {
>   struct myx_rx_ring *mrr = >sc_rx_ring[ring];
> @@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_
>   sc->sc_tx_ring_cons = idx;
>   sc->sc_tx_cons = cons;
>  
> - ifq_clr_oactive(>if_snd);
> - if (!ifq_empty(>if_snd))
> - if_start(ifp);
> + if (ifq_is_oactive(>if_snd))
> + ifq_restart(>if_snd);
>  }
>  
>  void
> Index: dev/pci/if_bnx.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v
> retrieving revision 1.118
> diff -u -p -r1.118 if_bnx.c
> --- dev/pci/if_bnx.c  5 Dec 2015 16:23:37 -   1.118
> +++ dev/pci/if_bnx.c  8 Dec 2015 11:31:09 -
> @@ -871,6 +871,7 @@ bnx_attachhook(void *xsc)
>   ifp = >arpcom.ac_if;
>   ifp->if_softc = sc;
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> + ifp->if_xflags = IFXF_MPSAFE;
>   ifp->if_ioctl = bnx_ioctl;
>   ifp->if_start = bnx_start;
>   ifp->if_watchdog = bnx_watchdog;
> @@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc)
>  
>   used = atomic_sub_int_nv(>used_tx_bd, freed);
>  
> + sc->tx_cons = sw_tx_cons;
> +
>   /* Clear the TX timeout timer. */
>   if (used == 0)
>   ifp->if_timer = 0;
>  
> - /* Clear the tx hardware queue full flag. */
> - if (used < sc->max_tx_bd) {
> - DBRUNIF(ifq_is_oactive(>if_snd),
> - printf("%s: Open TX chain! %d/%d (used/total)\n",
> - sc->bnx_dev.dv_xname, used,
> - sc->max_tx_bd));
> - ifq_clr_oactive(>if_snd);
> - }
> -
> - sc->tx_cons = sw_tx_cons;
> + if (ifq_is_oactive(>if_snd))
> + ifq_restart(>if_snd);
>  }
>  
>  
> //
> @@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp)
>   int used;
>   u_int16_t   tx_prod, tx_chain_prod;
>  
> - /* If there's no link or the transmit queue is empty then just exit. */
> - if (!sc->bnx_link || IFQ_IS_EMPTY(>if_snd)) {
> - DBPRINT(sc, BNX_INFO_SEND,
> - "%s(): No link or transmit queue empty.\n", __FUNCTION__);
> + if (!sc->bnx_link) {
> + ifq_purge(>if_snd);
>   goto bnx_start_exit;
>   }
>  
> Index: net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.423
> diff -u -p 

ifq serialisation, was Re: taskctx and revisiting if_start serialisation

2015-12-08 Thread David Gwynne
On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote:
> the current code for serialising if_start calls for mpsafe nics does what it 
> says.

as per mpi@s suggestion, this makes the ifq code responsible for
the task serialisation.

all the machinery is there, but it provides a minimal step toward
coordination of the oactive flag. the only concession is if a driver
wants to clear oactive and run its start routine again it can call
ifq_restart() to have it serialised with the stacks calls to the
start routine.

if there's a desire for a driver to run all of its txeof serialised
with its start routine, we'll figure out the best way to provide
that when the problem really occurs.

if_start_barrier is now called ifq_barrier too.i

Index: dev/pci/if_myx.c
===
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.90
diff -u -p -r1.90 if_myx.c
--- dev/pci/if_myx.c3 Dec 2015 12:45:56 -   1.90
+++ dev/pci/if_myx.c8 Dec 2015 11:31:09 -
@@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc)
ifq_clr_oactive(>if_snd);
SET(ifp->if_flags, IFF_RUNNING);
myx_iff(sc);
-   myx_start(ifp);
+   if_start(ifp);
 
return;
 
@@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc)
int  s;
int  ring;
 
+   CLR(ifp->if_flags, IFF_RUNNING);
+
bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
sc->sc_linkdown = sts->ms_linkdown;
@@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc)
}
 
ifq_clr_oactive(>if_snd);
-   CLR(ifp->if_flags, IFF_RUNNING);
-   if_start_barrier(ifp);
+   ifq_barrier(>if_snd);
 
for (ring = 0; ring < 2; ring++) {
struct myx_rx_ring *mrr = >sc_rx_ring[ring];
@@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_
sc->sc_tx_ring_cons = idx;
sc->sc_tx_cons = cons;
 
-   ifq_clr_oactive(>if_snd);
-   if (!ifq_empty(>if_snd))
-   if_start(ifp);
+   if (ifq_is_oactive(>if_snd))
+   ifq_restart(>if_snd);
 }
 
 void
Index: dev/pci/if_bnx.c
===
RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v
retrieving revision 1.118
diff -u -p -r1.118 if_bnx.c
--- dev/pci/if_bnx.c5 Dec 2015 16:23:37 -   1.118
+++ dev/pci/if_bnx.c8 Dec 2015 11:31:09 -
@@ -871,6 +871,7 @@ bnx_attachhook(void *xsc)
ifp = >arpcom.ac_if;
ifp->if_softc = sc;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+   ifp->if_xflags = IFXF_MPSAFE;
ifp->if_ioctl = bnx_ioctl;
ifp->if_start = bnx_start;
ifp->if_watchdog = bnx_watchdog;
@@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc)
 
used = atomic_sub_int_nv(>used_tx_bd, freed);
 
+   sc->tx_cons = sw_tx_cons;
+
/* Clear the TX timeout timer. */
if (used == 0)
ifp->if_timer = 0;
 
-   /* Clear the tx hardware queue full flag. */
-   if (used < sc->max_tx_bd) {
-   DBRUNIF(ifq_is_oactive(>if_snd),
-   printf("%s: Open TX chain! %d/%d (used/total)\n",
-   sc->bnx_dev.dv_xname, used,
-   sc->max_tx_bd));
-   ifq_clr_oactive(>if_snd);
-   }
-
-   sc->tx_cons = sw_tx_cons;
+   if (ifq_is_oactive(>if_snd))
+   ifq_restart(>if_snd);
 }
 
 //
@@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp)
int used;
u_int16_t   tx_prod, tx_chain_prod;
 
-   /* If there's no link or the transmit queue is empty then just exit. */
-   if (!sc->bnx_link || IFQ_IS_EMPTY(>if_snd)) {
-   DBPRINT(sc, BNX_INFO_SEND,
-   "%s(): No link or transmit queue empty.\n", __FUNCTION__);
+   if (!sc->bnx_link) {
+   ifq_purge(>if_snd);
goto bnx_start_exit;
}
 
Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.423
diff -u -p -r1.423 if.c
--- net/if.c8 Dec 2015 10:06:12 -   1.423
+++ net/if.c8 Dec 2015 11:31:09 -
@@ -1,4 +1,4 @@
-/* $OpenBSD: if.c,v 1.423 2015/12/08 10:06:12 dlg Exp $*/
+/* $OpenBSD: if.c,v 1.422 2015/12/05 10:07:55 tedu Exp $   */
 /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $  */
 
 /*
@@ -153,7 +153,6 @@ voidif_input_process(void *);
 void   ifa_print_all(void);
 #endif
 
-void   if_start_mpsafe(struct ifnet *ifp);
 void   if_start_locked(struct ifnet *ifp);
 
 /*
@@ -512,7 +511,7 @@ if_attach_common(struct ifnet *ifp)
TAILQ_INIT(>if_addrlist);
TAILQ_INIT(>if_maddrlist);
 
-   ifq_init(>if_snd);
+