On 26/03/20(Thu) 12:13, Martin Pieuchot wrote: > Previous diff had a bug in the interrupt handler refactoring that showed > up on i386. The version below has been tested on i386 and doesn't seem > to cause any regression. > > Here's the original explanation, diff below has msix toggle to "off": > > > Diff below allows ix(4) to establish two MSI-X handlers. Multiqueue > > support is intentionally not included in this diff. > > > > One handler is for the single queue (index ":0") and one for the > > link interrupt: > > > > # vmstat -i |grep ix0 > > irq114/ix0:0 73178178 3758 > > irq115/ix0 2 0 > > > > A per-driver toggle allows to switch MSI-X support on/off. My plan is > > to commit with the switch "off" for the moment. Switching it to "on" > > should be better done in the beginning of a release cycle. However it > > is "on" in the diff below for testing purposes. > > I appreciate more tests and oks :)
Updated diff that only unable interrupt moderation on the link interrupt if MSI-X is used. Without this change the !msix path was receiving less packets as found by Hrvoje. This has been tested on both i386 and amd64, I'm quite confident, any ok? Index: dev/pci/if_ix.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.163 diff -u -p -r1.163 if_ix.c --- dev/pci/if_ix.c 25 Mar 2020 17:20:46 -0000 1.163 +++ dev/pci/if_ix.c 30 Mar 2020 12:55:04 -0000 @@ -114,6 +114,8 @@ int ixgbe_media_change(struct ifnet *); void ixgbe_identify_hardware(struct ix_softc *); int ixgbe_allocate_pci_resources(struct ix_softc *); int ixgbe_allocate_legacy(struct ix_softc *); +int ixgbe_allocate_msix(struct ix_softc *); +int ixgbe_setup_msix(struct ix_softc *); int ixgbe_allocate_queues(struct ix_softc *); void ixgbe_free_pci_resources(struct ix_softc *); void ixgbe_local_timer(void *); @@ -140,6 +142,7 @@ void ixgbe_initialize_rss_mapping(struct int ixgbe_rxfill(struct rx_ring *); void ixgbe_rxrefill(void *); +int ixgbe_intr(struct ix_softc *sc); void ixgbe_enable_intr(struct ix_softc *); void ixgbe_disable_intr(struct ix_softc *); void ixgbe_update_stats_counters(struct ix_softc *); @@ -172,11 +175,16 @@ void ixgbe_handle_msf(struct ix_softc *) void ixgbe_handle_phy(struct ix_softc *); /* Legacy (single vector interrupt handler */ -int ixgbe_intr(void *); +int ixgbe_legacy_intr(void *); void ixgbe_enable_queue(struct ix_softc *, uint32_t); +void ixgbe_enable_queues(struct ix_softc *); void ixgbe_disable_queue(struct ix_softc *, uint32_t); void ixgbe_rearm_queue(struct ix_softc *, uint32_t); +/* MSI-X (multiple vectors interrupt handlers) */ +int ixgbe_link_intr(void *); +int ixgbe_queue_intr(void *); + /********************************************************************* * OpenBSD Device Interface Entry Points *********************************************************************/ @@ -190,6 +198,7 @@ struct cfattach ix_ca = { }; int ixgbe_smart_speed = ixgbe_smart_speed_on; +int ixgbe_enable_msix = 0; /********************************************************************* * Device identification routine @@ -292,7 +301,10 @@ ixgbe_attach(struct device *parent, stru bcopy(sc->hw.mac.addr, sc->arpcom.ac_enaddr, IXGBE_ETH_LENGTH_OF_ADDRESS); - error = ixgbe_allocate_legacy(sc); + if (sc->msix > 1) + error = ixgbe_allocate_msix(sc); + else + error = ixgbe_allocate_legacy(sc); if (error) goto err_late; @@ -524,6 +536,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c ixgbe_disable_intr(sc); ixgbe_iff(sc); ixgbe_enable_intr(sc); + ixgbe_enable_queues(sc); } error = 0; } @@ -819,6 +832,12 @@ ixgbe_init(void *arg) itr |= IXGBE_EITR_LLI_MOD | IXGBE_EITR_CNT_WDIS; IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(0), itr); + if (sc->msix > 1) { + /* Set moderation on the Link interrupt */ + IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(sc->linkvec), + IXGBE_LINK_ITR); + } + /* Enable power to the phy */ if (sc->hw.phy.ops.set_phy_power) sc->hw.phy.ops.set_phy_power(&sc->hw, TRUE); @@ -834,6 +853,7 @@ ixgbe_init(void *arg) /* And now turn on interrupts */ ixgbe_enable_intr(sc); + ixgbe_enable_queues(sc); /* Now inform the stack we're ready */ ifp->if_flags |= IFF_RUNNING; @@ -964,6 +984,16 @@ ixgbe_enable_queue(struct ix_softc *sc, } void +ixgbe_enable_queues(struct ix_softc *sc) +{ + struct ix_queue *que; + int i; + + for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++) + ixgbe_enable_queue(sc, que->msix); +} + +void ixgbe_disable_queue(struct ix_softc *sc, uint32_t vector) { uint64_t queue = 1ULL << vector; @@ -982,6 +1012,41 @@ ixgbe_disable_queue(struct ix_softc *sc, } } +/* + * MSIX Interrupt Handlers + */ +int +ixgbe_link_intr(void *vsc) +{ + struct ix_softc *sc = (struct ix_softc *)vsc; + + return ixgbe_intr(sc); +} + +int +ixgbe_queue_intr(void *vque) +{ + struct ix_queue *que = vque; + struct ix_softc *sc = que->sc; + struct ifnet *ifp = &sc->arpcom.ac_if; + struct tx_ring *txr = sc->tx_rings; + + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + ixgbe_rxeof(que); + ixgbe_txeof(txr); + if (ixgbe_rxfill(que->rxr)) { + /* Advance the Rx Queue "Tail Pointer" */ + IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(que->rxr->me), + que->rxr->last_desc_filled); + } else + timeout_add(&sc->rx_refill, 1); + } + + ixgbe_enable_queue(sc, que->msix); + + return (1); +} + /********************************************************************* * * Legacy Interrupt Service routine @@ -989,29 +1054,24 @@ ixgbe_disable_queue(struct ix_softc *sc, **********************************************************************/ int -ixgbe_intr(void *arg) +ixgbe_legacy_intr(void *arg) { struct ix_softc *sc = (struct ix_softc *)arg; - struct ix_queue *que = sc->queues; struct ifnet *ifp = &sc->arpcom.ac_if; struct tx_ring *txr = sc->tx_rings; - struct ixgbe_hw *hw = &sc->hw; - uint32_t reg_eicr, mod_mask, msf_mask; - int i, refill = 0; + int rv; - reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR); - if (reg_eicr == 0) { - ixgbe_enable_intr(sc); + rv = ixgbe_intr(sc); + if (rv == 0) { + ixgbe_enable_queues(sc); return (0); } if (ISSET(ifp->if_flags, IFF_RUNNING)) { + struct ix_queue *que = sc->queues; + ixgbe_rxeof(que); ixgbe_txeof(txr); - refill = 1; - } - - if (refill) { if (ixgbe_rxfill(que->rxr)) { /* Advance the Rx Queue "Tail Pointer" */ IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(que->rxr->me), @@ -1020,6 +1080,23 @@ ixgbe_intr(void *arg) timeout_add(&sc->rx_refill, 1); } + ixgbe_enable_queues(sc); + return (rv); +} + +int +ixgbe_intr(struct ix_softc *sc) +{ + struct ifnet *ifp = &sc->arpcom.ac_if; + struct ixgbe_hw *hw = &sc->hw; + uint32_t reg_eicr, mod_mask, msf_mask; + + reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR); + if (reg_eicr == 0) { + ixgbe_enable_intr(sc); + return (0); + } + /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) { KERNEL_LOCK(); @@ -1090,9 +1167,6 @@ ixgbe_intr(void *arg) KERNEL_UNLOCK(); } - for (i = 0; i < sc->num_queues; i++, que++) - ixgbe_enable_queue(sc, que->msix); - return (1); } @@ -1626,7 +1700,7 @@ ixgbe_allocate_legacy(struct ix_softc *s intrstr = pci_intr_string(pc, ih); sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, - ixgbe_intr, sc, sc->dev.dv_xname); + ixgbe_legacy_intr, sc, sc->dev.dv_xname); if (sc->tag == NULL) { printf(": couldn't establish interrupt"); if (intrstr != NULL) @@ -1642,6 +1716,92 @@ ixgbe_allocate_legacy(struct ix_softc *s return (0); } +/********************************************************************* + * + * Setup the MSI-X Interrupt handlers + * + **********************************************************************/ +int +ixgbe_allocate_msix(struct ix_softc *sc) +{ + struct ixgbe_osdep *os = &sc->osdep; + struct pci_attach_args *pa = &os->os_pa; + int vec, error = 0; + struct ix_queue *que = sc->queues; + const char *intrstr = NULL; + pci_chipset_tag_t pc = pa->pa_pc; + pci_intr_handle_t ih; + + vec = 0; + if (pci_intr_map_msix(pa, vec, &ih)) { + printf(": couldn't map interrupt\n"); + return (ENXIO); + } + + que->msix = vec; + snprintf(que->name, sizeof(que->name), "%s:%d", sc->dev.dv_xname, vec); + + intrstr = pci_intr_string(pc, ih); + que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, + ixgbe_queue_intr, que, que->name); + if (que->tag == NULL) { + printf(": couldn't establish interrupt"); + if (intrstr != NULL) + printf(" at %s", intrstr); + printf("\n"); + return (ENXIO); + } + + /* Now the link status/control last MSI-X vector */ + vec++; + if (pci_intr_map_msix(pa, vec, &ih)) { + printf(": couldn't map link vector\n"); + error = ENXIO; + goto fail; + } + + intrstr = pci_intr_string(pc, ih); + sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, + ixgbe_link_intr, sc, sc->dev.dv_xname); + if (sc->tag == NULL) { + printf(": couldn't establish interrupt"); + if (intrstr != NULL) + printf(" at %s", intrstr); + printf("\n"); + error = ENXIO; + goto fail; + } + + sc->linkvec = vec; + printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : ""); + + return (0); +fail: + pci_intr_disestablish(pc, que->tag); + que->tag = NULL; + return (error); +} + +int +ixgbe_setup_msix(struct ix_softc *sc) +{ + struct ixgbe_osdep *os = &sc->osdep; + struct pci_attach_args *pa = &os->os_pa; + pci_intr_handle_t dummy; + + if (!ixgbe_enable_msix) + return (0); + + /* + * Try a dummy map, maybe this bus doesn't like MSI, this function + * has no side effects. + */ + if (pci_intr_map_msix(pa, 0, &dummy)) + return (0); + + return (2); /* queue vector + link vector */ +} + int ixgbe_allocate_pci_resources(struct ix_softc *sc) { @@ -1666,10 +1826,8 @@ ixgbe_allocate_pci_resources(struct ix_s sc->num_queues = 1; sc->hw.back = os; -#ifdef notyet /* Now setup MSI or MSI/X, return us the number of supported vectors. */ sc->msix = ixgbe_setup_msix(sc); -#endif return (0); } @@ -3085,9 +3243,7 @@ void ixgbe_enable_intr(struct ix_softc *sc) { struct ixgbe_hw *hw = &sc->hw; - struct ix_queue *que = sc->queues; uint32_t mask, fwsm; - int i; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); /* Enable Fan Failure detection */ @@ -3135,14 +3291,6 @@ ixgbe_enable_intr(struct ix_softc *sc) IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } - /* - * Now enable all queues, this is done separately to - * allow for handling the extended (beyond 32) MSIX - * vectors that can be used by 82599 - */ - for (i = 0; i < sc->num_queues; i++, que++) - ixgbe_enable_queue(sc, que->msix); - IXGBE_WRITE_FLUSH(hw); } @@ -3258,15 +3406,11 @@ ixgbe_set_ivar(struct ix_softc *sc, uint void ixgbe_configure_ivars(struct ix_softc *sc) { -#if notyet struct ix_queue *que = sc->queues; uint32_t newitr; int i; - if (ixgbe_max_interrupt_rate > 0) - newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; - else - newitr = 0; + newitr = (4000000 / IXGBE_INTS_PER_SEC) & 0x0FF8; for (i = 0; i < sc->num_queues; i++, que++) { /* First the RX queue entry */ @@ -3280,7 +3424,6 @@ ixgbe_configure_ivars(struct ix_softc *s /* For the Link interrupt */ ixgbe_set_ivar(sc, 1, sc->linkvec, -1); -#endif } /* Index: dev/pci/if_ix.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ix.h,v retrieving revision 1.39 diff -u -p -r1.39 if_ix.h --- dev/pci/if_ix.h 25 Mar 2020 17:20:46 -0000 1.39 +++ dev/pci/if_ix.h 30 Mar 2020 12:54:31 -0000 @@ -120,6 +120,7 @@ * Interrupt Moderation parameters */ #define IXGBE_INTS_PER_SEC 8000 +#define IXGBE_LINK_ITR 1000 struct ixgbe_tx_buf { uint32_t eop_index; @@ -154,6 +155,8 @@ struct ix_queue { uint32_t msix; /* This queue's MSIX vector */ uint32_t eims; /* This queue's EIMS bit */ uint32_t eitr_setting; + char name[8]; + pci_intr_handle_t ih; void *tag; struct tx_ring *txr; struct rx_ring *rxr;