This one in the tree, so it’s live on my side.
> On 14 sep. 2015, at 13:09, David Gwynne <da...@gwynne.id.au> wrote:
>
> this is an attempt to make the interrupt path in vmx mpsafe.
>
> seems to hold up under load here, but more testing would be
> appreciated.
>
> Index: if_vmx.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_vmx.c
> --- if_vmx.c 24 Jun 2015 09:40:54 -0000 1.30
> +++ if_vmx.c 14 Sep 2015 11:08:09 -0000
> @@ -61,8 +61,9 @@ struct vmxnet3_txring {
> struct mbuf *m[NTXDESC];
> bus_dmamap_t dmap[NTXDESC];
> struct vmxnet3_txdesc *txd;
> - u_int head;
> - u_int next;
> + u_int prod;
> + u_int cons;
> + u_int free;
> u_int8_t gen;
> };
>
> @@ -107,6 +108,7 @@ struct vmxnet3_softc {
> bus_space_handle_t sc_ioh0;
> bus_space_handle_t sc_ioh1;
> bus_dma_tag_t sc_dmat;
> + void *sc_ih;
>
> struct vmxnet3_txqueue sc_txq[NTXQUEUE];
> struct vmxnet3_rxqueue sc_rxq[NRXQUEUE];
> @@ -167,7 +169,8 @@ void vmxnet3_reset(struct vmxnet3_softc
> int vmxnet3_init(struct vmxnet3_softc *);
> int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
> void vmxnet3_start(struct ifnet *);
> -int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct mbuf *);
> +int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct vmxnet3_txring *,
> + struct mbuf *);
> void vmxnet3_watchdog(struct ifnet *);
> void vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
> int vmxnet3_media_change(struct ifnet *);
> @@ -239,8 +242,8 @@ vmxnet3_attach(struct device *parent, st
> printf(": failed to map interrupt\n");
> return;
> }
> - pci_intr_establish(pa->pa_pc, ih, IPL_NET, vmxnet3_intr, sc,
> - self->dv_xname);
> + sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
> + vmxnet3_intr, sc, self->dv_xname);
> intrstr = pci_intr_string(pa->pa_pc, ih);
> if (intrstr)
> printf(": %s", intrstr);
> @@ -466,7 +469,8 @@ vmxnet3_txinit(struct vmxnet3_softc *sc,
> struct vmxnet3_txring *ring = &tq->cmd_ring;
> struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
>
> - ring->head = ring->next = 0;
> + ring->cons = ring->prod = 0;
> + ring->free = NTXDESC;
> ring->gen = 1;
> comp_ring->next = 0;
> comp_ring->gen = 1;
> @@ -594,16 +598,19 @@ vmxnet3_intr(void *arg)
>
> if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0)
> return 0;
> - if (sc->sc_ds->event)
> +
> + if (sc->sc_ds->event) {
> + KERNEL_LOCK();
> vmxnet3_evintr(sc);
> -#ifdef VMXNET3_STAT
> - vmxstat.intr++;
> -#endif
> + KERNEL_UNLOCK();
> + }
> +
> if (ifp->if_flags & IFF_RUNNING) {
> vmxnet3_rxintr(sc, &sc->sc_rxq[0]);
> vmxnet3_txintr(sc, &sc->sc_txq[0]);
> vmxnet3_enable_intr(sc, 0);
> }
> +
> return 1;
> }
>
> @@ -649,7 +656,12 @@ vmxnet3_txintr(struct vmxnet3_softc *sc,
> struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
> struct vmxnet3_txcompdesc *txcd;
> struct ifnet *ifp = &sc->sc_arpcom.ac_if;
> - u_int sop;
> + bus_dmamap_t map;
> + struct mbuf *m;
> + u_int cons;
> + u_int free = 0;
> +
> + cons = ring->cons;
>
> for (;;) {
> txcd = &comp_ring->txcd[comp_ring->next];
> @@ -664,21 +676,32 @@ vmxnet3_txintr(struct vmxnet3_softc *sc,
> comp_ring->gen ^= 1;
> }
>
> - sop = ring->next;
> - if (ring->m[sop] == NULL)
> - panic("%s: NULL ring->m[%u]", __func__, sop);
> - m_freem(ring->m[sop]);
> - ring->m[sop] = NULL;
> - bus_dmamap_unload(sc->sc_dmat, ring->dmap[sop]);
> - ring->next = (letoh32((txcd->txc_word0 >>
> + m = ring->m[cons];
> + ring->m[cons] = NULL;
> +
> + KASSERT(m != NULL);
> +
> + map = ring->dmap[cons];
> + free += map->dm_nsegs;
> + bus_dmamap_unload(sc->sc_dmat, map);
> + m_freem(m);
> +
> + cons = (letoh32((txcd->txc_word0 >>
> VMXNET3_TXC_EOPIDX_S) & VMXNET3_TXC_EOPIDX_M) + 1)
> % NTXDESC;
> -
> - ifp->if_flags &= ~IFF_OACTIVE;
> }
> - if (ring->head == ring->next)
> +
> + ring->cons = cons;
> +
> + if (atomic_add_int_nv(&ring->free, free) == NTXDESC)
> ifp->if_timer = 0;
> - vmxnet3_start(ifp);
> +
> + if (ISSET(ifp->if_flags, IFF_OACTIVE)) {
> + KERNEL_LOCK();
> + CLR(ifp->if_flags, IFF_OACTIVE);
> + vmxnet3_start(ifp);
> + KERNEL_UNLOCK();
> + }
> }
>
> void
> @@ -911,6 +934,8 @@ vmxnet3_stop(struct ifnet *ifp)
>
> WRITE_CMD(sc, VMXNET3_CMD_DISABLE);
>
> + intr_barrier(sc->sc_ih);
> +
> for (queue = 0; queue < NTXQUEUE; queue++)
> vmxnet3_txstop(sc, &sc->sc_txq[queue]);
> for (queue = 0; queue < NRXQUEUE; queue++)
> @@ -944,6 +969,11 @@ vmxnet3_init(struct vmxnet3_softc *sc)
> for (queue = 0; queue < NRXQUEUE; queue++)
> vmxnet3_rxinit(sc, &sc->sc_rxq[queue]);
>
> + for (queue = 0; queue < NRXQUEUE; queue++) {
> + WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
> + WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
> + }
> +
> WRITE_CMD(sc, VMXNET3_CMD_ENABLE);
> if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) {
> printf("%s: failed to initialize\n", ifp->if_xname);
> @@ -951,11 +981,6 @@ vmxnet3_init(struct vmxnet3_softc *sc)
> return EIO;
> }
>
> - for (queue = 0; queue < NRXQUEUE; queue++) {
> - WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
> - WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
> - }
> -
> /* Program promiscuous mode and multicast filters. */
> vmxnet3_iff(sc);
>
> @@ -1024,59 +1049,57 @@ void
> vmxnet3_start(struct ifnet *ifp)
> {
> struct vmxnet3_softc *sc = ifp->if_softc;
> - struct vmxnet3_txqueue *tq = &sc->sc_txq[0];
> + struct vmxnet3_txqueue *tq = sc->sc_txq;
> struct vmxnet3_txring *ring = &tq->cmd_ring;
> struct mbuf *m;
> - int n = 0;
> + u_int free, used;
> + int n;
>
> if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
> return;
>
> + free = ring->free;
> + used = 0;
> +
> for (;;) {
> - IFQ_POLL(&ifp->if_snd, m);
> - if (m == NULL)
> - break;
> - if ((ring->next - ring->head - 1) % NTXDESC < NTXSEGS) {
> + if (used + NTXSEGS > free) {
> ifp->if_flags |= IFF_OACTIVE;
> break;
> }
>
> IFQ_DEQUEUE(&ifp->if_snd, m);
> - if (vmxnet3_load_mbuf(sc, m) != 0) {
> + if (m == NULL)
> + break;
> +
> + n = vmxnet3_load_mbuf(sc, ring, m);
> + if (n == -1) {
> ifp->if_oerrors++;
> continue;
> }
> -#if NBPFILTER > 0
> - if (ifp->if_bpf)
> - bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> -#endif
> - ifp->if_timer = 5;
> +
> ifp->if_opackets++;
> - n++;
> + used += n;
> }
>
> - if (n > 0)
> - WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), ring->head);
> -#ifdef VMXNET3_STAT
> - vmxstat.txhead = ring->head;
> - vmxstat.txdone = ring->next;
> - vmxstat.maxtxlen =
> - max(vmxstat.maxtxlen, (ring->head - ring->next) % NTXDESC);
> -#endif
> + if (used > 0) {
> + ifp->if_timer = 5;
> + atomic_sub_int(&ring->free, used);
> + WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), ring->prod);
> + }
> }
>
> int
> -vmxnet3_load_mbuf(struct vmxnet3_softc *sc, struct mbuf *m)
> +vmxnet3_load_mbuf(struct vmxnet3_softc *sc, struct vmxnet3_txring *ring,
> + struct mbuf *m)
> {
> - struct vmxnet3_txqueue *tq = &sc->sc_txq[0];
> - struct vmxnet3_txring *ring = &tq->cmd_ring;
> struct vmxnet3_txdesc *txd, *sop;
> - struct mbuf *mp;
> - struct ip *ip;
> - bus_dmamap_t map = ring->dmap[ring->head];
> + bus_dmamap_t map;
> u_int hlen = ETHER_HDR_LEN, csum_off;
> - int offp, gen, i;
> + u_int prod;
> + int gen, i;
>
> + prod = ring->prod;
> + map = ring->dmap[prod];
> #if 0
> if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) {
> printf("%s: IP checksum offloading is not supported\n",
> @@ -1085,6 +1108,10 @@ vmxnet3_load_mbuf(struct vmxnet3_softc *
> }
> #endif
> if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
> + struct mbuf *mp;
> + struct ip *ip;
> + int offp;
> +
> if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
> csum_off = offsetof(struct tcphdr, th_sum);
> else
> @@ -1117,21 +1144,24 @@ vmxnet3_load_mbuf(struct vmxnet3_softc *
> return -1;
> }
>
> - ring->m[ring->head] = m;
> - sop = &ring->txd[ring->head];
> + ring->m[prod] = m;
> +
> + sop = &ring->txd[prod];
> gen = ring->gen ^ 1; /* owned by cpu (yet) */
> +
> for (i = 0; i < map->dm_nsegs; i++) {
> - txd = &ring->txd[ring->head];
> + txd = &ring->txd[prod];
> txd->tx_addr = htole64(map->dm_segs[i].ds_addr);
> txd->tx_word2 = htole32(((map->dm_segs[i].ds_len &
> VMXNET3_TX_LEN_M) << VMXNET3_TX_LEN_S) |
> ((gen & VMXNET3_TX_GEN_M) << VMXNET3_TX_GEN_S));
> txd->tx_word3 = 0;
> - ring->head++;
> - if (ring->head == NTXDESC) {
> - ring->head = 0;
> +
> + if (++prod == NTXDESC) {
> + prod = 0;
> ring->gen ^= 1;
> }
> +
> gen = ring->gen;
> }
> txd->tx_word3 |= htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ);
> @@ -1148,10 +1178,14 @@ vmxnet3_load_mbuf(struct vmxnet3_softc *
> VMXNET3_TX_HLEN_S) | (VMXNET3_OM_CSUM << VMXNET3_TX_OM_S));
> }
>
> + /* dmamap_sync map */
> +
> + ring->prod = prod;
> +
> /* Change the ownership by flipping the "generation" bit */
> sop->tx_word2 ^= htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S);
>
> - return (0);
> + return (map->dm_nsegs);
> }
>
> void
>