hrvoje popovski hit a problem where the kernel would panic under load.

i mistakenly called an interfaces qstart routine directly from
if_enqueue rather than via the ifq serializer. this meant that txeof
routines on network cards calling ifq_restart would cause the start
routine to run concurrently, therefore causing corruption of the
ring state.

this diff fixes that.

On Mon, Jan 23, 2017 at 01:09:57PM +1000, David Gwynne wrote:
> the short explanation is that this lets interfaces allocate multiple
> ifq structures that can be mapped to their transmit rings. the
> mechanism for this is a driver calling if_attach_queues() after
> theyve called if_attach().
> 
> the long version is that this has if_enqueue access an array of
> ifqueues on the interface instead of if_snd directly. the ifq is
> picked by asking the queue discipline (priq or hfsc) to map an mbuf
> to a slot in the if_ifqs array.
> 
> to notify the driver that a particular queue needs to start ive
> added a new function pointer to ifnet called if_qstart. if_qstart
> takes an ifqueue * as an argument instead of an ifnet *, thereby
> getting past the implicit behaviour that interfaces only have a
> single ring.
> 
> our drivers all have if_start routines that take ifnet pointers
> though, so there's compatability for those where a default if_qstart
> implementation calls if_start for those drivers. in the future
> if_start will be replaced with if_qstart and we can rename it back
> to if_start. until then, there's compat.
> 
> drivers that provide their own if_qstart instead of an if_start
> function notify the stack by setting IFXF_MPSAFE. a chunk of this
> diff is changing the IFXF_MPSAFE drivers to set if_qstart instead
> of if_start. note that this is a mechanical change, it does not add
> multiple tx queues to these drivers.
> 
> most of this is straightforward except for the hfsc handling. hfsc
> needs to track all flows going over an interface, which means all
> flows have to be serialised through hfsc. the mechanism in use
> before this change was to swap the priq backend on if_snd with the
> hfsc backend. the trick with this diff is that we still do that,
> ie, we only change the first ifqueue on an interface over to hfsc.
> this works because we use the ifqops on the first ifq to map packets
> to any of them. because the hfsc map function unconditionally maps
> packets to the first ifq, all packets end up going through the one
> hfsc structure we set up. the rest of the ifqs remain set up as
> priq, but dont get used for sending packets after hfsc has been
> enabled. if we ever add another ifqops backend, this will have to
> be rethought. until then this is an elegant hack.
> 
> a consequence of this change is that we the ifnet if_start function
> should not be called anymore. this isnt true at the moment because
> of things like net80211 and ppp. they both queue management packets
> onto a separate queue, but those separate queues are dequeued and
> processed in the interfaces start routine. if we want to mark wifi
> and ppp drivers as mpsafe (or get rid of separate if_start and
> if_qstart routines) this will have to change.
> 
> the guts of this change are in if_enqueue and if_attach_queues.
> 
> ok?
>

Index: arch/octeon/dev/if_cnmac.c
===================================================================
RCS file: /cvs/src/sys/arch/octeon/dev/if_cnmac.c,v
retrieving revision 1.61
diff -u -p -r1.61 if_cnmac.c
--- arch/octeon/dev/if_cnmac.c  5 Nov 2016 05:14:18 -0000       1.61
+++ arch/octeon/dev/if_cnmac.c  23 Jan 2017 06:32:59 -0000
@@ -138,7 +138,7 @@ int octeon_eth_ioctl(struct ifnet *, u_l
 void   octeon_eth_watchdog(struct ifnet *);
 int    octeon_eth_init(struct ifnet *);
 int    octeon_eth_stop(struct ifnet *, int);
-void   octeon_eth_start(struct ifnet *);
+void   octeon_eth_start(struct ifqueue *);
 
 int    octeon_eth_send_cmd(struct octeon_eth_softc *, uint64_t, uint64_t);
 uint64_t octeon_eth_send_makecmd_w1(int, paddr_t);
@@ -303,7 +303,7 @@ octeon_eth_attach(struct device *parent,
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = octeon_eth_ioctl;
-       ifp->if_start = octeon_eth_start;
+       ifp->if_qstart = octeon_eth_start;
        ifp->if_watchdog = octeon_eth_watchdog;
        ifp->if_hardmtu = OCTEON_ETH_MAX_MTU;
        IFQ_SET_MAXLEN(&ifp->if_snd, max(GATHER_QUEUE_SIZE, IFQ_MAXLEN));
@@ -704,8 +704,6 @@ octeon_eth_ioctl(struct ifnet *ifp, u_lo
                error = 0;
        }
 
-       if_start(ifp);
-
        splx(s);
        return (error);
 }
@@ -923,13 +921,14 @@ done:
 }
 
 void
-octeon_eth_start(struct ifnet *ifp)
+octeon_eth_start(struct ifqueue *ifq)
 {
+       struct ifnet *ifp = ifq->ifq_if;
        struct octeon_eth_softc *sc = ifp->if_softc;
        struct mbuf *m;
 
        if (__predict_false(!cn30xxgmx_link_status(sc->sc_gmx_port))) {
-               ifq_purge(&ifp->if_snd);
+               ifq_purge(ifq);
                return;
        }
 
@@ -948,12 +947,12 @@ octeon_eth_start(struct ifnet *ifp)
                 * and bail out.
                 */
                if (octeon_eth_send_queue_is_full(sc)) {
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        timeout_add(&sc->sc_tick_free_ch, 1);
                        return;
                }
 
-               m = ifq_dequeue(&ifp->if_snd);
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        return;
 
Index: dev/ic/re.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.200
diff -u -p -r1.200 re.c
--- dev/ic/re.c 22 Jan 2017 10:17:38 -0000      1.200
+++ dev/ic/re.c 23 Jan 2017 06:32:59 -0000
@@ -161,7 +161,7 @@ int re_tx_list_init(struct rl_softc *);
 int    re_rxeof(struct rl_softc *);
 int    re_txeof(struct rl_softc *);
 void   re_tick(void *);
-void   re_start(struct ifnet *);
+void   re_start(struct ifqueue *);
 void   re_txstart(void *);
 int    re_ioctl(struct ifnet *, u_long, caddr_t);
 void   re_watchdog(struct ifnet *);
@@ -1005,7 +1005,7 @@ re_attach(struct rl_softc *sc, const cha
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = re_ioctl;
-       ifp->if_start = re_start;
+       ifp->if_qstart = re_start;
        ifp->if_watchdog = re_watchdog;
        ifp->if_hardmtu = sc->rl_max_mtu;
        IFQ_SET_MAXLEN(&ifp->if_snd, sc->rl_ldata.rl_tx_desc_cnt);
@@ -1776,8 +1776,9 @@ re_txstart(void *xsc)
  */
 
 void
-re_start(struct ifnet *ifp)
+re_start(struct ifqueue *ifq)
 {
+       struct ifnet    *ifp = ifq->ifq_if;
        struct rl_softc *sc = ifp->if_softc;
        struct mbuf     *m;
        unsigned int    idx;
@@ -1785,7 +1786,7 @@ re_start(struct ifnet *ifp)
        int             post = 0;
 
        if (!ISSET(sc->rl_flags, RL_FLAG_LINK)) {
-               IFQ_PURGE(&ifp->if_snd);
+               ifq_purge(ifq);
                return;
        }
 
@@ -1797,11 +1798,11 @@ re_start(struct ifnet *ifp)
 
        for (;;) {
                if (sc->rl_ldata.rl_tx_ndescs >= free + 2) {
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               m = ifq_dequeue(&ifp->if_snd);
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        break;
 
@@ -1831,7 +1832,7 @@ re_start(struct ifnet *ifp)
 
        ifp->if_timer = 5;
        sc->rl_ldata.rl_txq_prodidx = idx;
-       ifq_serialize(&ifp->if_snd, &sc->rl_start);
+       ifq_serialize(ifq, &sc->rl_start);
 }
 
 int
Index: dev/pci/if_bge.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_bge.c,v
retrieving revision 1.383
diff -u -p -r1.383 if_bge.c
--- dev/pci/if_bge.c    22 Jan 2017 10:17:38 -0000      1.383
+++ dev/pci/if_bge.c    23 Jan 2017 06:32:59 -0000
@@ -142,7 +142,7 @@ int bge_encap(struct bge_softc *, struct
 int bge_compact_dma_runt(struct mbuf *);
 
 int bge_intr(void *);
-void bge_start(struct ifnet *);
+void bge_start(struct ifqueue *);
 int bge_ioctl(struct ifnet *, u_long, caddr_t);
 int bge_rxrinfo(struct bge_softc *, struct if_rxrinfo *);
 void bge_init(void *);
@@ -2996,7 +2996,7 @@ bge_attach(struct device *parent, struct
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = bge_ioctl;
-       ifp->if_start = bge_start;
+       ifp->if_qstart = bge_start;
        ifp->if_watchdog = bge_watchdog;
        IFQ_SET_MAXLEN(&ifp->if_snd, BGE_TX_RING_CNT - 1);
 
@@ -4116,14 +4116,15 @@ fail_unload:
  * to the mbuf data regions directly in the transmit descriptors.
  */
 void
-bge_start(struct ifnet *ifp)
+bge_start(struct ifqueue *ifq)
 {
+       struct ifnet *ifp = ifq->ifq_if;
        struct bge_softc *sc = ifp->if_softc;
        struct mbuf *m;
        int txinc;
 
        if (!BGE_STS_BIT(sc, BGE_STS_LINK)) {
-               IFQ_PURGE(&ifp->if_snd);
+               ifq_purge(ifq);
                return;
        }
 
@@ -4132,11 +4133,11 @@ bge_start(struct ifnet *ifp)
                /* Check if we have enough free send BDs. */
                if (sc->bge_txcnt + txinc + BGE_NTXSEG + 16 >=
                    BGE_TX_RING_CNT) {
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               IFQ_DEQUEUE(&ifp->if_snd, m);
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        break;
 
Index: dev/pci/if_bnx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v
retrieving revision 1.123
diff -u -p -r1.123 if_bnx.c
--- dev/pci/if_bnx.c    22 Jan 2017 10:17:38 -0000      1.123
+++ dev/pci/if_bnx.c    23 Jan 2017 06:32:59 -0000
@@ -366,7 +366,7 @@ void        bnx_free_tx_chain(struct bnx_softc 
 void   bnx_rxrefill(void *);
 
 int    bnx_tx_encap(struct bnx_softc *, struct mbuf *, int *);
-void   bnx_start(struct ifnet *);
+void   bnx_start(struct ifqueue *);
 int    bnx_ioctl(struct ifnet *, u_long, caddr_t);
 void   bnx_watchdog(struct ifnet *);
 int    bnx_ifmedia_upd(struct ifnet *);
@@ -873,7 +873,7 @@ bnx_attachhook(struct device *self)
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = bnx_ioctl;
-       ifp->if_start = bnx_start;
+       ifp->if_qstart = bnx_start;
        ifp->if_watchdog = bnx_watchdog;
        IFQ_SET_MAXLEN(&ifp->if_snd, USABLE_TX_BD - 1);
        bcopy(sc->eaddr, sc->arpcom.ac_enaddr, ETHER_ADDR_LEN);
@@ -4865,15 +4865,16 @@ bnx_tx_encap(struct bnx_softc *sc, struc
 /*   Nothing.                                                               */
 /****************************************************************************/
 void
-bnx_start(struct ifnet *ifp)
+bnx_start(struct ifqueue *ifq)
 {
+       struct ifnet            *ifp = ifq->ifq_if;
        struct bnx_softc        *sc = ifp->if_softc;
        struct mbuf             *m_head = NULL;
        int                     used;
        u_int16_t               tx_prod, tx_chain_prod;
 
        if (!sc->bnx_link) {
-               ifq_purge(&ifp->if_snd);
+               ifq_purge(ifq);
                goto bnx_start_exit;
        }
 
@@ -4895,11 +4896,11 @@ bnx_start(struct ifnet *ifp)
                        DBPRINT(sc, BNX_INFO_SEND, "TX chain is closed for "
                            "business! Total tx_bd used = %d\n",
                            sc->used_tx_bd + used);
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               IFQ_DEQUEUE(&ifp->if_snd, m_head);
+               m_head = ifq_dequeue(ifq);
                if (m_head == NULL)
                        break;
 
@@ -5149,11 +5150,8 @@ bnx_intr(void *xsc)
 
                /* Start moving packets again */
                if (ifp->if_flags & IFF_RUNNING &&
-                   !IFQ_IS_EMPTY(&ifp->if_snd)) {
-                       KERNEL_LOCK();
-                       bnx_start(ifp);
-                       KERNEL_UNLOCK();
-               }
+                   !IFQ_IS_EMPTY(&ifp->if_snd))
+                       ifq_start(&ifp->if_snd);
        }
 
 out:
@@ -5486,8 +5484,8 @@ bnx_tick(void *xsc)
            IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
                sc->bnx_link++;
                /* Now that link is up, handle any outstanding TX traffic. */
-               if (!IFQ_IS_EMPTY(&ifp->if_snd))
-                       bnx_start(ifp);
+               if (!ifq_empty(&ifp->if_snd))
+                       ifq_start(&ifp->if_snd);
        }
 
 bnx_tick_exit:
Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.333
diff -u -p -r1.333 if_em.c
--- dev/pci/if_em.c     22 Jan 2017 10:17:38 -0000      1.333
+++ dev/pci/if_em.c     23 Jan 2017 06:32:59 -0000
@@ -206,7 +206,7 @@ void em_defer_attach(struct device*);
 int  em_detach(struct device *, int);
 int  em_activate(struct device *, int);
 int  em_intr(void *);
-void em_start(struct ifnet *);
+void em_start(struct ifqueue *);
 int  em_ioctl(struct ifnet *, u_long, caddr_t);
 void em_watchdog(struct ifnet *);
 void em_init(void *);
@@ -583,15 +583,16 @@ err_pci:
  **********************************************************************/
 
 void
-em_start(struct ifnet *ifp)
+em_start(struct ifqueue *ifq)
 {
+       struct ifnet *ifp = ifq->ifq_if;
        struct em_softc *sc = ifp->if_softc;
        u_int head, free, used;
        struct mbuf *m;
        int post = 0;
 
        if (!sc->link_active) {
-               IFQ_PURGE(&ifp->if_snd);
+               ifq_purge(ifq);
                return;
        }
 
@@ -611,11 +612,11 @@ em_start(struct ifnet *ifp)
        for (;;) {
                /* use 2 because cksum setup can use an extra slot */
                if (EM_MAX_SCATTER + 2 > free) {
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               m = ifq_dequeue(&ifp->if_snd);
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        break;
 
@@ -1870,7 +1871,7 @@ em_setup_interface(struct em_softc *sc)
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = em_ioctl;
-       ifp->if_start = em_start;
+       ifp->if_qstart = em_start;
        ifp->if_watchdog = em_watchdog;
        ifp->if_hardmtu =
                sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
Index: dev/pci/if_myx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.100
diff -u -p -r1.100 if_myx.c
--- dev/pci/if_myx.c    22 Jan 2017 10:17:38 -0000      1.100
+++ dev/pci/if_myx.c    23 Jan 2017 06:32:59 -0000
@@ -201,7 +201,7 @@ void         myx_up(struct myx_softc *);
 void    myx_iff(struct myx_softc *);
 void    myx_down(struct myx_softc *);
 
-void    myx_start(struct ifnet *);
+void    myx_start(struct ifqueue *);
 void    myx_write_txd_tail(struct myx_softc *, struct myx_slot *, u_int8_t,
            u_int32_t, u_int);
 int     myx_load_mbuf(struct myx_softc *, struct myx_slot *, struct mbuf *);
@@ -510,7 +510,7 @@ myx_attachhook(struct device *self)
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = myx_ioctl;
-       ifp->if_start = myx_start;
+       ifp->if_qstart = myx_start;
        ifp->if_watchdog = myx_watchdog;
        ifp->if_hardmtu = MYX_RXBIG_SIZE;
        strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
@@ -1200,10 +1200,9 @@ myx_up(struct myx_softc *sc)
                goto empty_rx_ring_big;
        }
 
-       ifq_clr_oactive(&ifp->if_snd);
-       SET(ifp->if_flags, IFF_RUNNING);
        myx_iff(sc);
-       if_start(ifp);
+       SET(ifp->if_flags, IFF_RUNNING);
+       ifq_restart(&ifp->if_snd);
 
        return;
 
@@ -1422,8 +1421,9 @@ myx_write_txd_tail(struct myx_softc *sc,
 }
 
 void
-myx_start(struct ifnet *ifp)
+myx_start(struct ifqueue *ifq)
 {
+       struct ifnet                    *ifp = ifq->ifq_if;
        struct myx_tx_desc              txd;
        struct myx_softc                *sc = ifp->if_softc;
        struct myx_slot                 *ms;
@@ -1448,11 +1448,11 @@ myx_start(struct ifnet *ifp)
 
        for (;;) {
                if (used + sc->sc_tx_nsegs + 1 > free) {
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               IFQ_DEQUEUE(&ifp->if_snd, m);
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        break;
 
Index: dev/pci/if_ix.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.149
diff -u -p -r1.149 if_ix.c
--- dev/pci/if_ix.c     22 Jan 2017 10:17:38 -0000      1.149
+++ dev/pci/if_ix.c     23 Jan 2017 06:32:59 -0000
@@ -93,7 +93,7 @@ const struct pci_matchid ixgbe_devices[]
 int    ixgbe_probe(struct device *, void *, void *);
 void   ixgbe_attach(struct device *, struct device *, void *);
 int    ixgbe_detach(struct device *, int);
-void   ixgbe_start(struct ifnet *);
+void   ixgbe_start(struct ifqueue *);
 int    ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
 int    ixgbe_rxrinfo(struct ix_softc *, struct if_rxrinfo *);
 void   ixgbe_watchdog(struct ifnet *);
@@ -379,14 +379,15 @@ ixgbe_detach(struct device *self, int fl
  **********************************************************************/
 
 void
-ixgbe_start(struct ifnet * ifp)
+ixgbe_start(struct ifqueue *ifq)
 {
+       struct ifnet            *ifp = ifq->ifq_if;
        struct ix_softc         *sc = ifp->if_softc;
        struct tx_ring          *txr = sc->tx_rings;
        struct mbuf             *m_head;
        int                      post = 0;
 
-       if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
+       if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(ifq))
                return;
        if (!sc->link_up)
                return;
@@ -398,11 +399,11 @@ ixgbe_start(struct ifnet * ifp)
        for (;;) {
                /* Check that we have the minimal number of TX descriptors. */
                if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) {
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               m_head = ifq_dequeue(&ifp->if_snd);
+               m_head = ifq_dequeue(ifq);
                if (m_head == NULL)
                        break;
 
@@ -1612,7 +1613,7 @@ ixgbe_setup_interface(struct ix_softc *s
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = ixgbe_ioctl;
-       ifp->if_start = ixgbe_start;
+       ifp->if_qstart = ixgbe_start;
        ifp->if_timer = 0;
        ifp->if_watchdog = ixgbe_watchdog;
        ifp->if_hardmtu = IXGBE_MAX_FRAME_SIZE -
Index: dev/pv/if_hvn.c
===================================================================
RCS file: /cvs/src/sys/dev/pv/if_hvn.c,v
retrieving revision 1.31
diff -u -p -r1.31 if_hvn.c
--- dev/pv/if_hvn.c     22 Jan 2017 10:17:39 -0000      1.31
+++ dev/pv/if_hvn.c     23 Jan 2017 06:32:59 -0000
@@ -179,7 +179,7 @@ void        hvn_media_status(struct ifnet *, st
 int    hvn_iff(struct hvn_softc *);
 void   hvn_init(struct hvn_softc *);
 void   hvn_stop(struct hvn_softc *);
-void   hvn_start(struct ifnet *);
+void   hvn_start(struct ifqueue *);
 int    hvn_encap(struct hvn_softc *, struct mbuf *, struct hvn_tx_desc **);
 void   hvn_decap(struct hvn_softc *, struct hvn_tx_desc *);
 void   hvn_txeof(struct hvn_softc *, uint64_t);
@@ -266,7 +266,7 @@ hvn_attach(struct device *parent, struct
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = hvn_ioctl;
-       ifp->if_start = hvn_start;
+       ifp->if_qstart = hvn_start;
        ifp->if_softc = sc;
 
        ifp->if_capabilities = IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
@@ -320,7 +320,7 @@ hvn_attach(struct device *parent, struct
        hvn_rx_ring_destroy(sc);
        hvn_tx_ring_destroy(sc);
        hvn_nvs_detach(sc);
-       if (ifp->if_start)
+       if (ifp->if_qstart)
                if_detach(ifp);
 }
 
@@ -441,23 +441,21 @@ hvn_stop(struct hvn_softc *sc)
 }
 
 void
-hvn_start(struct ifnet *ifp)
+hvn_start(struct ifqueue *ifq)
 {
+       struct ifnet *ifp = ifq->ifq_if;
        struct hvn_softc *sc = ifp->if_softc;
        struct hvn_tx_desc *txd;
        struct mbuf *m;
 
-       if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
-               return;
-
        for (;;) {
                if (!sc->sc_tx_avail) {
                        /* transient */
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
 
-               m = ifq_dequeue(&ifp->if_snd);
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        break;
 
Index: dev/pv/if_xnf.c
===================================================================
RCS file: /cvs/src/sys/dev/pv/if_xnf.c,v
retrieving revision 1.47
diff -u -p -r1.47 if_xnf.c
--- dev/pv/if_xnf.c     22 Jan 2017 10:17:39 -0000      1.47
+++ dev/pv/if_xnf.c     23 Jan 2017 06:32:59 -0000
@@ -199,7 +199,7 @@ void        xnf_media_status(struct ifnet *, st
 int    xnf_iff(struct xnf_softc *);
 void   xnf_init(struct xnf_softc *);
 void   xnf_stop(struct xnf_softc *);
-void   xnf_start(struct ifnet *);
+void   xnf_start(struct ifqueue *);
 int    xnf_encap(struct xnf_softc *, struct mbuf *, uint32_t *);
 void   xnf_intr(void *);
 void   xnf_watchdog(struct ifnet *);
@@ -292,7 +292,7 @@ xnf_attach(struct device *parent, struct
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = xnf_ioctl;
-       ifp->if_start = xnf_start;
+       ifp->if_qstart = xnf_start;
        ifp->if_watchdog = xnf_watchdog;
        ifp->if_softc = sc;
 
@@ -477,17 +477,15 @@ xnf_stop(struct xnf_softc *sc)
 }
 
 void
-xnf_start(struct ifnet *ifp)
+xnf_start(struct ifqueue *ifq)
 {
+       struct ifnet *ifp = ifq->ifq_if;
        struct xnf_softc *sc = ifp->if_softc;
        struct xnf_tx_ring *txr = sc->sc_tx_ring;
        struct mbuf *m;
        int pkts = 0;
        uint32_t prod, oprod;
 
-       if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
-               return;
-
        bus_dmamap_sync(sc->sc_dmat, sc->sc_tx_rmap, 0, 0,
            BUS_DMASYNC_POSTREAD);
 
@@ -497,10 +495,11 @@ xnf_start(struct ifnet *ifp)
                if ((XNF_TX_DESC - (prod - sc->sc_tx_cons)) <
                    sc->sc_tx_frags) {
                        /* transient */
-                       ifq_set_oactive(&ifp->if_snd);
+                       ifq_set_oactive(ifq);
                        break;
                }
-               m = ifq_dequeue(&ifp->if_snd);
+
+               m = ifq_dequeue(ifq);
                if (m == NULL)
                        break;
 
Index: net/hfsc.c
===================================================================
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.34
diff -u -p -r1.34 hfsc.c
--- net/hfsc.c  22 Jan 2017 04:48:23 -0000      1.34
+++ net/hfsc.c  23 Jan 2017 06:32:59 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: hfsc.c,v 1.34 2017/01/22 04:48:23 dlg Exp $   */
+/*     $OpenBSD: hfsc.c,v 1.33 2016/09/15 02:00:18 dlg Exp $   */
 
 /*
  * Copyright (c) 2012-2013 Henning Brauer <henn...@openbsd.org>
@@ -259,20 +259,22 @@ struct pool       hfsc_class_pl, hfsc_internal
  * ifqueue glue.
  */
 
-void           *hfsc_alloc(void *);
-void            hfsc_free(void *);
+unsigned int    hfsc_idx(unsigned int, const struct mbuf *);
 int             hfsc_enq(struct ifqueue *, struct mbuf *);
 struct mbuf    *hfsc_deq_begin(struct ifqueue *, void **);
 void            hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *);
 void            hfsc_purge(struct ifqueue *, struct mbuf_list *);
+void           *hfsc_alloc(unsigned int, void *);
+void            hfsc_free(unsigned int, void *);
 
 const struct ifq_ops hfsc_ops = {
-       hfsc_alloc,
-       hfsc_free,
+       hfsc_idx,
        hfsc_enq,
        hfsc_deq_begin,
        hfsc_deq_commit,
        hfsc_purge,
+       hfsc_alloc,
+       hfsc_free,
 };
 
 const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops;
@@ -414,13 +416,26 @@ hfsc_pf_qstats(struct pf_queuespec *q, v
 void
 hfsc_pf_free(struct hfsc_if *hif)
 {
-       hfsc_free(hif);
+       hfsc_free(0, hif);
+}
+
+unsigned int
+hfsc_idx(unsigned int nqueues, const struct mbuf *m)
+{
+       /*
+        * hfsc can only function on a single ifq and the stack understands
+        * this. when the first ifq on an interface is switched to hfsc,
+        * this gets used to map all mbufs to the first and only ifq that
+        * is set up for hfsc.
+        */
+       return (0);
 }
 
 void *
-hfsc_alloc(void *q)
+hfsc_alloc(unsigned int idx, void *q)
 {
        struct hfsc_if *hif = q;
+       KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */
        KASSERT(hif != NULL);
 
        timeout_add(&hif->hif_defer, 1);
@@ -428,12 +443,13 @@ hfsc_alloc(void *q)
 }
 
 void
-hfsc_free(void *q)
+hfsc_free(unsigned int idx, void *q)
 {
        struct hfsc_if *hif = q;
        int i;
 
        KERNEL_ASSERT_LOCKED();
+       KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */
 
        timeout_del(&hif->hif_defer);
 
@@ -758,18 +774,16 @@ void
 hfsc_deferred(void *arg)
 {
        struct ifnet *ifp = arg;
+       struct ifqueue *ifq = &ifp->if_snd;
        struct hfsc_if *hif;
-       int s;
 
        KERNEL_ASSERT_LOCKED();
-       KASSERT(HFSC_ENABLED(&ifp->if_snd));
+       KASSERT(HFSC_ENABLED(ifq));
 
-       s = splnet();
-       if (!IFQ_IS_EMPTY(&ifp->if_snd))
-               if_start(ifp);
-       splx(s);
+       if (!ifq_empty(ifq))
+               (*ifp->if_qstart)(ifq);
 
-       hif = ifp->if_snd.ifq_q;
+       hif = ifq->ifq_q;
 
        /* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */
        timeout_add(&hif->hif_defer, 1);
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.476
diff -u -p -r1.476 if.c
--- net/if.c    23 Jan 2017 01:26:09 -0000      1.476
+++ net/if.c    23 Jan 2017 06:32:59 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if.c,v 1.476 2017/01/23 01:26:09 dlg Exp $    */
+/*     $OpenBSD: if.c,v 1.474 2017/01/12 09:07:46 mpi Exp $    */
 /*     $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $  */
 
 /*
@@ -136,7 +136,7 @@ void        if_attach_common(struct ifnet *);
 int    if_setrdomain(struct ifnet *, int);
 void   if_slowtimo(void *);
 
-void   if_detached_start(struct ifnet *);
+void   if_detached_qstart(struct ifqueue *);
 int    if_detached_ioctl(struct ifnet *, u_long, caddr_t);
 
 int    if_getgroup(caddr_t, struct ifnet *);
@@ -161,7 +161,7 @@ void        if_netisr(void *);
 void   ifa_print_all(void);
 #endif
 
-void   if_start_locked(struct ifnet *);
+void   if_qstart_compat(struct ifqueue *);
 
 /*
  * interface index map
@@ -527,12 +527,53 @@ if_attach(struct ifnet *ifp)
 }
 
 void
+if_attach_queues(struct ifnet *ifp, unsigned int nqs)
+{
+       struct ifqueue **map;
+       struct ifqueue *ifq;
+       int i;
+
+       KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
+       KASSERT(nqs != 0);
+
+       map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
+
+       ifp->if_snd.ifq_softc = NULL;
+       map[0] = &ifp->if_snd;
+
+       for (i = 1; i < nqs; i++) {
+               ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
+               ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
+               ifq_init(ifq, ifp, i);
+               map[i] = ifq;
+       }
+
+       ifp->if_ifqs = map;
+       ifp->if_nifqs = nqs;
+}
+
+void
 if_attach_common(struct ifnet *ifp)
 {
        TAILQ_INIT(&ifp->if_addrlist);
        TAILQ_INIT(&ifp->if_maddrlist);
 
-       ifq_init(&ifp->if_snd, ifp);
+       if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
+               KASSERTMSG(ifp->if_qstart == NULL,
+                   "%s: if_qstart set without MPSAFE set", ifp->if_xname);
+               ifp->if_qstart = if_qstart_compat;
+       } else {
+               KASSERTMSG(ifp->if_start == NULL,
+                   "%s: if_start set with MPSAFE set", ifp->if_xname);
+               KASSERTMSG(ifp->if_qstart != NULL,
+                   "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
+       }
+
+       ifq_init(&ifp->if_snd, ifp, 0);
+
+       ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
+       ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
+       ifp->if_nifqs = 1;
 
        ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
            M_TEMP, M_WAITOK);
@@ -560,22 +601,44 @@ if_attach_common(struct ifnet *ifp)
 }
 
 void
-if_start(struct ifnet *ifp)
+if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
 {
-       if (ISSET(ifp->if_xflags, IFXF_MPSAFE))
-               ifq_start(&ifp->if_snd);
-       else
-               if_start_locked(ifp);
+       /*
+        * only switch the ifq_ops on the first ifq on an interface.
+        *
+        * the only ifq_ops we provide priq and hfsc, and hfsc only
+        * works on a single ifq. because the code uses the ifq_ops
+        * on the first ifq (if_snd) to select a queue for an mbuf,
+        * by switching only the first one we change both the algorithm
+        * and force the routing of all new packets to it.
+        */
+       ifq_attach(&ifp->if_snd, newops, args);
 }
 
 void
-if_start_locked(struct ifnet *ifp)
+if_start(struct ifnet *ifp)
+{
+       KASSERT(ifp->if_qstart == if_qstart_compat);
+       if_qstart_compat(&ifp->if_snd);
+}
+void
+if_qstart_compat(struct ifqueue *ifq)
 {
+       struct ifnet *ifp = ifq->ifq_if;
        int s;
 
+       /*
+        * the stack assumes that an interface can have multiple
+        * transmit rings, but a lot of drivers are still written
+        * so that interfaces and send rings have a 1:1 mapping.
+        * this provides compatability between the stack and the older
+        * drivers by translating from the only queue they have
+        * (ifp->if_snd) back to the interface and calling if_start.
+        */
+
        KERNEL_LOCK();
        s = splnet();
-       ifp->if_start(ifp);
+       (*ifp->if_start)(ifp);
        splx(s);
        KERNEL_UNLOCK();
 }
@@ -583,7 +646,9 @@ if_start_locked(struct ifnet *ifp)
 int
 if_enqueue(struct ifnet *ifp, struct mbuf *m)
 {
-       int error = 0;
+       unsigned int idx;
+       struct ifqueue *ifq;
+       int error;
 
 #if NBRIDGE > 0
        if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
@@ -599,14 +664,17 @@ if_enqueue(struct ifnet *ifp, struct mbu
 #endif /* NPF > 0 */
 
        /*
-        * Queue message on interface, and start output if interface
-        * not yet active.
+        * use the operations on the first ifq to pick which of the array
+        * gets this mbuf.
         */
-       IFQ_ENQUEUE(&ifp->if_snd, m, error);
+       idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
+       ifq = ifp->if_ifqs[idx];
+
+       error = ifq_enqueue(ifq, m);
        if (error)
                return (error);
 
-       if_start(ifp);
+       ifq_start(ifq);
 
        return (0);
 }
@@ -942,7 +1010,7 @@ if_detach(struct ifnet *ifp)
        /* Other CPUs must not have a reference before we start destroying. */
        if_idxmap_remove(ifp);
 
-       ifp->if_start = if_detached_start;
+       ifp->if_qstart = if_detached_qstart;
        ifp->if_ioctl = if_detached_ioctl;
        ifp->if_watchdog = NULL;
 
@@ -1016,7 +1084,16 @@ if_detach(struct ifnet *ifp)
        splx(s2);
        NET_UNLOCK(s);
 
-       ifq_destroy(&ifp->if_snd);
+       for (i = 0; i < ifp->if_nifqs; i++)
+               ifq_destroy(ifp->if_ifqs[i]);
+       if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
+               for (i = 1; i < ifp->if_nifqs; i++) {
+                       free(ifp->if_ifqs[i], M_DEVBUF,
+                           sizeof(struct ifqueue));
+               }
+               free(ifp->if_ifqs, M_DEVBUF,
+                   sizeof(struct ifqueue *) * ifp->if_nifqs);
+       }
 }
 
 /*
@@ -2165,21 +2242,24 @@ ifconf(u_long cmd, caddr_t data)
 void
 if_data(struct ifnet *ifp, struct if_data *data)
 {
+       unsigned int i;
        struct ifqueue *ifq;
        uint64_t opackets = 0;
        uint64_t obytes = 0;
        uint64_t omcasts = 0;
        uint64_t oqdrops = 0;
 
-       ifq = &ifp->if_snd;
+       for (i = 0; i < ifp->if_nifqs; i++) {
+               ifq = ifp->if_ifqs[i];
 
-       mtx_enter(&ifq->ifq_mtx);
-       opackets += ifq->ifq_packets;
-       obytes += ifq->ifq_bytes;
-       oqdrops += ifq->ifq_qdrops;
-       omcasts += ifq->ifq_mcasts;
-       /* ifq->ifq_errors */
-       mtx_leave(&ifq->ifq_mtx);
+               mtx_enter(&ifq->ifq_mtx);
+               opackets += ifq->ifq_packets;
+               obytes += ifq->ifq_bytes;
+               oqdrops += ifq->ifq_qdrops;
+               omcasts += ifq->ifq_mcasts;
+               mtx_leave(&ifq->ifq_mtx);
+               /* ifq->ifq_errors */
+       }
 
        *data = ifp->if_data;
        data->ifi_opackets += opackets;
@@ -2194,9 +2274,9 @@ if_data(struct ifnet *ifp, struct if_dat
  * fiddle with the if during detach.
  */
 void
-if_detached_start(struct ifnet *ifp)
+if_detached_qstart(struct ifqueue *ifq)
 {
-       IFQ_PURGE(&ifp->if_snd);
+       ifq_purge(ifq);
 }
 
 int
Index: net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.182
diff -u -p -r1.182 if.h
--- net/if.h    23 Jan 2017 01:26:09 -0000      1.182
+++ net/if.h    23 Jan 2017 06:32:59 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if.h,v 1.182 2017/01/23 01:26:09 dlg Exp $    */
+/*     $OpenBSD: if.h,v 1.181 2016/12/12 09:51:30 mpi Exp $    */
 /*     $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $  */
 
 /*
@@ -456,10 +456,13 @@ struct if_parent {
 #ifdef _KERNEL
 struct socket;
 struct ifnet;
+struct ifq_ops;
 
 void   if_alloc_sadl(struct ifnet *);
 void   if_free_sadl(struct ifnet *);
 void   if_attach(struct ifnet *);
+void   if_attach_queues(struct ifnet *, unsigned int);
+void   if_attach_ifq(struct ifnet *, const struct ifq_ops *, void *);
 void   if_attachtail(struct ifnet *);
 void   if_attachhead(struct ifnet *);
 void   if_deactivate(struct ifnet *);
Index: net/if_mpw.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpw.c,v
retrieving revision 1.16
diff -u -p -r1.16 if_mpw.c
--- net/if_mpw.c        20 Dec 2016 12:18:44 -0000      1.16
+++ net/if_mpw.c        23 Jan 2017 06:32:59 -0000
@@ -332,7 +332,7 @@ mpw_output(struct ifnet *ifp, struct mbu
 }
 
 #if NVLAN > 0
-extern void vlan_start(struct ifnet *ifp);
+extern void vlan_start(struct ifqueue *);
 
 /*
  * This routine handles VLAN tag reinsertion in packets flowing through
@@ -349,7 +349,7 @@ mpw_vlan_handle(struct mbuf *m, struct m
        uint16_t tag = 0;
  
        ifp = if_get(m->m_pkthdr.ph_ifidx);
-       if (ifp != NULL && ifp->if_start == vlan_start &&
+       if (ifp != NULL && ifp->if_qstart == vlan_start &&
            ISSET(ifp->if_flags, IFF_RUNNING)) {
                ifv = ifp->if_softc;
                type = ifv->ifv_type;
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.79
diff -u -p -r1.79 if_var.h
--- net/if_var.h        21 Jan 2017 01:32:19 -0000      1.79
+++ net/if_var.h        23 Jan 2017 06:32:59 -0000
@@ -157,7 +157,12 @@ struct ifnet {                             /* and the 
entries */
                                        /* timer routine */
        void    (*if_watchdog)(struct ifnet *);
        int     (*if_wol)(struct ifnet *, int);
-       struct  ifqueue if_snd;         /* output queue */
+
+       struct  ifqueue if_snd;         /* transmit queue */
+       struct  ifqueue **if_ifqs;      /* pointer to an array of sndqs */
+       void    (*if_qstart)(struct ifqueue *);
+       unsigned int if_nifqs;
+
        struct sockaddr_dl *if_sadl;    /* pointer to our sockaddr_dl */
 
        void    *if_afdata[AF_MAX];
Index: net/if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.168
diff -u -p -r1.168 if_vlan.c
--- net/if_vlan.c       22 Jan 2017 10:17:39 -0000      1.168
+++ net/if_vlan.c       23 Jan 2017 06:32:59 -0000
@@ -85,7 +85,7 @@ int   vlan_clone_create(struct if_clone *,
 int    vlan_clone_destroy(struct ifnet *);
 
 int    vlan_input(struct ifnet *, struct mbuf *, void *);
-void   vlan_start(struct ifnet *ifp);
+void   vlan_start(struct ifqueue *ifq);
 int    vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
 
 int    vlan_up(struct ifvlan *);
@@ -175,7 +175,7 @@ vlan_clone_create(struct if_clone *ifc, 
 
        ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
        ifp->if_xflags = IFXF_MPSAFE;
-       ifp->if_start = vlan_start;
+       ifp->if_qstart = vlan_start;
        ifp->if_ioctl = vlan_ioctl;
        ifp->if_hardmtu = 0xffff;
        ifp->if_link_state = LINK_STATE_DOWN;
@@ -238,8 +238,9 @@ vlan_mplstunnel(int ifidx)
 }
 
 void
-vlan_start(struct ifnet *ifp)
+vlan_start(struct ifqueue *ifq)
 {
+       struct ifnet    *ifp = ifq->ifq_if;
        struct ifvlan   *ifv;
        struct ifnet    *ifp0;
        struct mbuf     *m;
@@ -249,15 +250,11 @@ vlan_start(struct ifnet *ifp)
        ifp0 = if_get(ifv->ifv_ifp0);
        if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) !=
            (IFF_UP|IFF_RUNNING)) {
-               ifq_purge(&ifp->if_snd);
+               ifq_purge(ifq);
                goto leave;
        }
 
-       for (;;) {
-               IFQ_DEQUEUE(&ifp->if_snd, m);
-               if (m == NULL)
-                       break;
-
+       while ((m = ifq_dequeue(ifq)) != NULL) {
 #if NBPFILTER > 0
                if (ifp->if_bpf)
                        bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
@@ -296,6 +293,7 @@ vlan_start(struct ifnet *ifp)
 
                if (if_enqueue(ifp0, m)) {
                        ifp->if_oerrors++;
+                       ifq->ifq_errors++;
                        continue;
                }
        }
Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.5
diff -u -p -r1.5 ifq.c
--- net/ifq.c   20 Jan 2017 03:48:03 -0000      1.5
+++ net/ifq.c   23 Jan 2017 06:32:59 -0000
@@ -28,20 +28,23 @@
 /*
  * priq glue
  */
-void           *priq_alloc(void *);
-void            priq_free(void *);
+unsigned int    priq_idx(unsigned int, const struct mbuf *);
 int             priq_enq(struct ifqueue *, struct mbuf *);
 struct mbuf    *priq_deq_begin(struct ifqueue *, void **);
 void            priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
 void            priq_purge(struct ifqueue *, struct mbuf_list *);
 
+void           *priq_alloc(unsigned int, void *);
+void            priq_free(unsigned int, void *);
+
 const struct ifq_ops priq_ops = {
-       priq_alloc,
-       priq_free,
+       priq_idx,
        priq_enq,
        priq_deq_begin,
        priq_deq_commit,
        priq_purge,
+       priq_alloc,
+       priq_free,
 };
 
 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
@@ -119,7 +122,7 @@ ifq_start_task(void *p)
            ifq_empty(ifq) || ifq_is_oactive(ifq))
                return;
 
-       ifp->if_start(ifp);
+       ifp->if_qstart(ifq);
 }
 
 void
@@ -129,7 +132,7 @@ ifq_restart_task(void *p)
        struct ifnet *ifp = ifq->ifq_if;
 
        ifq_clr_oactive(ifq);
-       ifp->if_start(ifp);
+       ifp->if_qstart(ifq);
 }
 
 void
@@ -167,19 +170,26 @@ ifq_barrier_task(void *p)
  */
 
 void
-ifq_init(struct ifqueue *ifq, struct ifnet *ifp)
+ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
 {
        ifq->ifq_if = ifp;
+       ifq->ifq_softc = NULL;
 
        mtx_init(&ifq->ifq_mtx, IPL_NET);
        ifq->ifq_qdrops = 0;
 
        /* default to priq */
        ifq->ifq_ops = &priq_ops;
-       ifq->ifq_q = priq_ops.ifqop_alloc(NULL);
+       ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
 
        ifq->ifq_len = 0;
 
+       ifq->ifq_packets = 0;
+       ifq->ifq_bytes = 0;
+       ifq->ifq_qdrops = 0;
+       ifq->ifq_errors = 0;
+       ifq->ifq_mcasts = 0;
+
        mtx_init(&ifq->ifq_task_mtx, IPL_NET);
        TAILQ_INIT(&ifq->ifq_task_list);
        ifq->ifq_serializer = NULL;
@@ -189,6 +199,8 @@ ifq_init(struct ifqueue *ifq, struct ifn
 
        if (ifq->ifq_maxlen == 0)
                ifq_set_maxlen(ifq, IFQ_MAXLEN);
+
+       ifq->ifq_idx = idx;
 }
 
 void
@@ -200,7 +212,7 @@ ifq_attach(struct ifqueue *ifq, const st
        const struct ifq_ops *oldops;
        void *newq, *oldq;
 
-       newq = newops->ifqop_alloc(opsarg);
+       newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
 
        mtx_enter(&ifq->ifq_mtx);
        ifq->ifq_ops->ifqop_purge(ifq, &ml);
@@ -221,7 +233,7 @@ ifq_attach(struct ifqueue *ifq, const st
        }
        mtx_leave(&ifq->ifq_mtx);
 
-       oldops->ifqop_free(oldq);
+       oldops->ifqop_free(ifq->ifq_idx, oldq);
 
        ml_purge(&free_ml);
 }
@@ -234,7 +246,7 @@ ifq_destroy(struct ifqueue *ifq)
        /* don't need to lock because this is the last use of the ifq */
 
        ifq->ifq_ops->ifqop_purge(ifq, &ml);
-       ifq->ifq_ops->ifqop_free(ifq->ifq_q);
+       ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
 
        ml_purge(&ml);
 }
@@ -368,14 +380,25 @@ ifq_q_leave(struct ifqueue *ifq, void *q
  * priq implementation
  */
 
+unsigned int
+priq_idx(unsigned int nqueues, const struct mbuf *m)
+{
+       unsigned int flow = 0;
+
+       if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
+               flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
+
+       return (flow % nqueues);
+}
+
 void *
-priq_alloc(void *null)
+priq_alloc(unsigned int idx, void *null)
 {
        return (malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK | M_ZERO));
 }
 
 void
-priq_free(void *pq)
+priq_free(unsigned int idx, void *pq)
 {
        free(pq, M_DEVBUF, sizeof(struct priq));
 }
Index: net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.7
diff -u -p -r1.7 ifq.h
--- net/ifq.h   22 Jan 2017 04:48:23 -0000      1.7
+++ net/ifq.h   23 Jan 2017 06:32:59 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: ifq.h,v 1.7 2017/01/22 04:48:23 dlg Exp $ */
+/*     $OpenBSD: ifq.h,v 1.6 2017/01/20 03:48:03 dlg Exp $ */
 
 /*
  * Copyright (c) 2015 David Gwynne <d...@openbsd.org>
@@ -25,6 +25,18 @@ struct ifq_ops;
 
 struct ifqueue {
        struct ifnet            *ifq_if;
+       union {
+               void                    *_ifq_softc;
+               /*
+                * a rings sndq is found by looking up an array of pointers.
+                * by default we only have one sndq and the default drivers
+                * dont use ifq_softc, so we can borrow it for the map until
+                * we need to allocate a proper map.
+                */
+               struct ifqueue          *_ifq_ifqs[1];
+       } _ifq_ptr;
+#define ifq_softc               _ifq_ptr._ifq_softc
+#define ifq_ifqs                _ifq_ptr._ifq_ifqs
 
        /* mbuf handling */
        struct mutex             ifq_mtx;
@@ -49,7 +61,9 @@ struct ifqueue {
        struct task              ifq_start;
        struct task              ifq_restart;
 
+       /* properties */
        unsigned int             ifq_maxlen;
+       unsigned int             ifq_idx;
 };
 
 #ifdef _KERNEL
@@ -308,21 +322,23 @@ struct ifqueue {
  */
 
 struct ifq_ops {
-       void                    *(*ifqop_alloc)(void *);
-       void                     (*ifqop_free)(void *);
+       unsigned int             (*ifqop_idx)(unsigned int,
+                                   const struct mbuf *);
        int                      (*ifqop_enq)(struct ifqueue *, struct mbuf *);
        struct mbuf             *(*ifqop_deq_begin)(struct ifqueue *, void **);
        void                     (*ifqop_deq_commit)(struct ifqueue *,
                                    struct mbuf *, void *);
        void                     (*ifqop_purge)(struct ifqueue *,
                                    struct mbuf_list *);
+       void                    *(*ifqop_alloc)(unsigned int, void *);
+       void                     (*ifqop_free)(unsigned int, void *);
 };
 
 /*
  * Interface send queues.
  */
 
-void            ifq_init(struct ifqueue *, struct ifnet *);
+void            ifq_init(struct ifqueue *, struct ifnet *, unsigned int);
 void            ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
 void            ifq_destroy(struct ifqueue *);
 int             ifq_enqueue_try(struct ifqueue *, struct mbuf *);
@@ -370,6 +386,12 @@ static inline void
 ifq_restart(struct ifqueue *ifq)
 {
        ifq_serialize(ifq, &ifq->ifq_restart);
+}
+
+static inline unsigned int
+ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m)
+{
+       return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m));
 }
 
 #define IFQ_ASSERT_SERIALIZED(_ifq)    KASSERT(ifq_is_serialized(_ifq))

Reply via email to