this is an attempt to make the interrupt path in vmx mpsafe. seems to hold up under load here, but more testing would be appreciated.
Index: if_vmx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.30 diff -u -p -r1.30 if_vmx.c --- if_vmx.c 24 Jun 2015 09:40:54 -0000 1.30 +++ if_vmx.c 14 Sep 2015 11:08:09 -0000 @@ -61,8 +61,9 @@ struct vmxnet3_txring { struct mbuf *m[NTXDESC]; bus_dmamap_t dmap[NTXDESC]; struct vmxnet3_txdesc *txd; - u_int head; - u_int next; + u_int prod; + u_int cons; + u_int free; u_int8_t gen; }; @@ -107,6 +108,7 @@ struct vmxnet3_softc { bus_space_handle_t sc_ioh0; bus_space_handle_t sc_ioh1; bus_dma_tag_t sc_dmat; + void *sc_ih; struct vmxnet3_txqueue sc_txq[NTXQUEUE]; struct vmxnet3_rxqueue sc_rxq[NRXQUEUE]; @@ -167,7 +169,8 @@ void vmxnet3_reset(struct vmxnet3_softc int vmxnet3_init(struct vmxnet3_softc *); int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t); void vmxnet3_start(struct ifnet *); -int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct mbuf *); +int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct vmxnet3_txring *, + struct mbuf *); void vmxnet3_watchdog(struct ifnet *); void vmxnet3_media_status(struct ifnet *, struct ifmediareq *); int vmxnet3_media_change(struct ifnet *); @@ -239,8 +242,8 @@ vmxnet3_attach(struct device *parent, st printf(": failed to map interrupt\n"); return; } - pci_intr_establish(pa->pa_pc, ih, IPL_NET, vmxnet3_intr, sc, - self->dv_xname); + sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE, + vmxnet3_intr, sc, self->dv_xname); intrstr = pci_intr_string(pa->pa_pc, ih); if (intrstr) printf(": %s", intrstr); @@ -466,7 +469,8 @@ vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txring *ring = &tq->cmd_ring; struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; - ring->head = ring->next = 0; + ring->cons = ring->prod = 0; + ring->free = NTXDESC; ring->gen = 1; comp_ring->next = 0; comp_ring->gen = 1; @@ -594,16 +598,19 @@ vmxnet3_intr(void *arg) if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0) return 0; - if (sc->sc_ds->event) + + if (sc->sc_ds->event) { + KERNEL_LOCK(); vmxnet3_evintr(sc); -#ifdef VMXNET3_STAT - vmxstat.intr++; -#endif + KERNEL_UNLOCK(); + } + if (ifp->if_flags & IFF_RUNNING) { vmxnet3_rxintr(sc, &sc->sc_rxq[0]); vmxnet3_txintr(sc, &sc->sc_txq[0]); vmxnet3_enable_intr(sc, 0); } + return 1; } @@ -649,7 +656,12 @@ vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; struct vmxnet3_txcompdesc *txcd; struct ifnet *ifp = &sc->sc_arpcom.ac_if; - u_int sop; + bus_dmamap_t map; + struct mbuf *m; + u_int cons; + u_int free = 0; + + cons = ring->cons; for (;;) { txcd = &comp_ring->txcd[comp_ring->next]; @@ -664,21 +676,32 @@ vmxnet3_txintr(struct vmxnet3_softc *sc, comp_ring->gen ^= 1; } - sop = ring->next; - if (ring->m[sop] == NULL) - panic("%s: NULL ring->m[%u]", __func__, sop); - m_freem(ring->m[sop]); - ring->m[sop] = NULL; - bus_dmamap_unload(sc->sc_dmat, ring->dmap[sop]); - ring->next = (letoh32((txcd->txc_word0 >> + m = ring->m[cons]; + ring->m[cons] = NULL; + + KASSERT(m != NULL); + + map = ring->dmap[cons]; + free += map->dm_nsegs; + bus_dmamap_unload(sc->sc_dmat, map); + m_freem(m); + + cons = (letoh32((txcd->txc_word0 >> VMXNET3_TXC_EOPIDX_S) & VMXNET3_TXC_EOPIDX_M) + 1) % NTXDESC; - - ifp->if_flags &= ~IFF_OACTIVE; } - if (ring->head == ring->next) + + ring->cons = cons; + + if (atomic_add_int_nv(&ring->free, free) == NTXDESC) ifp->if_timer = 0; - vmxnet3_start(ifp); + + if (ISSET(ifp->if_flags, IFF_OACTIVE)) { + KERNEL_LOCK(); + CLR(ifp->if_flags, IFF_OACTIVE); + vmxnet3_start(ifp); + KERNEL_UNLOCK(); + } } void @@ -911,6 +934,8 @@ vmxnet3_stop(struct ifnet *ifp) WRITE_CMD(sc, VMXNET3_CMD_DISABLE); + intr_barrier(sc->sc_ih); + for (queue = 0; queue < NTXQUEUE; queue++) vmxnet3_txstop(sc, &sc->sc_txq[queue]); for (queue = 0; queue < NRXQUEUE; queue++) @@ -944,6 +969,11 @@ vmxnet3_init(struct vmxnet3_softc *sc) for (queue = 0; queue < NRXQUEUE; queue++) vmxnet3_rxinit(sc, &sc->sc_rxq[queue]); + for (queue = 0; queue < NRXQUEUE; queue++) { + WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0); + WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0); + } + WRITE_CMD(sc, VMXNET3_CMD_ENABLE); if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) { printf("%s: failed to initialize\n", ifp->if_xname); @@ -951,11 +981,6 @@ vmxnet3_init(struct vmxnet3_softc *sc) return EIO; } - for (queue = 0; queue < NRXQUEUE; queue++) { - WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0); - WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0); - } - /* Program promiscuous mode and multicast filters. */ vmxnet3_iff(sc); @@ -1024,59 +1049,57 @@ void vmxnet3_start(struct ifnet *ifp) { struct vmxnet3_softc *sc = ifp->if_softc; - struct vmxnet3_txqueue *tq = &sc->sc_txq[0]; + struct vmxnet3_txqueue *tq = sc->sc_txq; struct vmxnet3_txring *ring = &tq->cmd_ring; struct mbuf *m; - int n = 0; + u_int free, used; + int n; if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING) return; + free = ring->free; + used = 0; + for (;;) { - IFQ_POLL(&ifp->if_snd, m); - if (m == NULL) - break; - if ((ring->next - ring->head - 1) % NTXDESC < NTXSEGS) { + if (used + NTXSEGS > free) { ifp->if_flags |= IFF_OACTIVE; break; } IFQ_DEQUEUE(&ifp->if_snd, m); - if (vmxnet3_load_mbuf(sc, m) != 0) { + if (m == NULL) + break; + + n = vmxnet3_load_mbuf(sc, ring, m); + if (n == -1) { ifp->if_oerrors++; continue; } -#if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); -#endif - ifp->if_timer = 5; + ifp->if_opackets++; - n++; + used += n; } - if (n > 0) - WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), ring->head); -#ifdef VMXNET3_STAT - vmxstat.txhead = ring->head; - vmxstat.txdone = ring->next; - vmxstat.maxtxlen = - max(vmxstat.maxtxlen, (ring->head - ring->next) % NTXDESC); -#endif + if (used > 0) { + ifp->if_timer = 5; + atomic_sub_int(&ring->free, used); + WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), ring->prod); + } } int -vmxnet3_load_mbuf(struct vmxnet3_softc *sc, struct mbuf *m) +vmxnet3_load_mbuf(struct vmxnet3_softc *sc, struct vmxnet3_txring *ring, + struct mbuf *m) { - struct vmxnet3_txqueue *tq = &sc->sc_txq[0]; - struct vmxnet3_txring *ring = &tq->cmd_ring; struct vmxnet3_txdesc *txd, *sop; - struct mbuf *mp; - struct ip *ip; - bus_dmamap_t map = ring->dmap[ring->head]; + bus_dmamap_t map; u_int hlen = ETHER_HDR_LEN, csum_off; - int offp, gen, i; + u_int prod; + int gen, i; + prod = ring->prod; + map = ring->dmap[prod]; #if 0 if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) { printf("%s: IP checksum offloading is not supported\n", @@ -1085,6 +1108,10 @@ vmxnet3_load_mbuf(struct vmxnet3_softc * } #endif if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) { + struct mbuf *mp; + struct ip *ip; + int offp; + if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) csum_off = offsetof(struct tcphdr, th_sum); else @@ -1117,21 +1144,24 @@ vmxnet3_load_mbuf(struct vmxnet3_softc * return -1; } - ring->m[ring->head] = m; - sop = &ring->txd[ring->head]; + ring->m[prod] = m; + + sop = &ring->txd[prod]; gen = ring->gen ^ 1; /* owned by cpu (yet) */ + for (i = 0; i < map->dm_nsegs; i++) { - txd = &ring->txd[ring->head]; + txd = &ring->txd[prod]; txd->tx_addr = htole64(map->dm_segs[i].ds_addr); txd->tx_word2 = htole32(((map->dm_segs[i].ds_len & VMXNET3_TX_LEN_M) << VMXNET3_TX_LEN_S) | ((gen & VMXNET3_TX_GEN_M) << VMXNET3_TX_GEN_S)); txd->tx_word3 = 0; - ring->head++; - if (ring->head == NTXDESC) { - ring->head = 0; + + if (++prod == NTXDESC) { + prod = 0; ring->gen ^= 1; } + gen = ring->gen; } txd->tx_word3 |= htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ); @@ -1148,10 +1178,14 @@ vmxnet3_load_mbuf(struct vmxnet3_softc * VMXNET3_TX_HLEN_S) | (VMXNET3_OM_CSUM << VMXNET3_TX_OM_S)); } + /* dmamap_sync map */ + + ring->prod = prod; + /* Change the ownership by flipping the "generation" bit */ sop->tx_word2 ^= htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S); - return (0); + return (map->dm_nsegs); } void