Module Name: src
Committed By: msaitoh
Date: Tue May 8 09:45:54 UTC 2018
Modified Files:
src/sys/dev/pci/ixgbe: ix_txrx.c ixgbe.c ixgbe.h ixv.c
Log Message:
- Fix broken watchdog timer. This change detects TX device timeout correctly.
NOTE: It's supporsed not to be called {ixgbe,ixv}_rearm_queues() in the
timer. Those are not required if any chip have no bug. In reality,
ixgbe_rearm_queues() is required on 82599 and newer chip AND other than
queue 0 to prevent device timeout. When it occured, packet was sent but the
descriptor's DD bit wasn't set even though IXGBE_TXD_CMD_EOP and
IXGBE_TXD_CMD_RS were set. After forcing interrupt by writing EICS register
in ixgbe_rearm_queues(), DD is set. Why? Is this an undocumented errata? It
might be possible not call rearm_queues on 82598 or queue 0, we call in any
cases in case the problem occurs. On ixv(4), I have not seen this problem yet
(though I tested only on X550_X(Xeon D 12xx)'s virtual function), but we
do rearm in case TX device timeout happen.
- ixv(4): Call callout_stop() earlier in ixv_stop() like ixgbe_stop().
- KNF.
To generate a diff of this commit:
cvs rdiff -u -r1.41 -r1.42 src/sys/dev/pci/ixgbe/ix_txrx.c
cvs rdiff -u -r1.149 -r1.150 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.46 -r1.47 src/sys/dev/pci/ixgbe/ixgbe.h
cvs rdiff -u -r1.95 -r1.96 src/sys/dev/pci/ixgbe/ixv.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/dev/pci/ixgbe/ix_txrx.c
diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.41 src/sys/dev/pci/ixgbe/ix_txrx.c:1.42
--- src/sys/dev/pci/ixgbe/ix_txrx.c:1.41 Wed Apr 25 08:46:19 2018
+++ src/sys/dev/pci/ixgbe/ix_txrx.c Tue May 8 09:45:54 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.41 2018/04/25 08:46:19 msaitoh Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.42 2018/05/08 09:45:54 msaitoh Exp $ */
/******************************************************************************
@@ -130,9 +130,10 @@ static void ixgbe_setup_hw_rsc(struct rx
int
ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
- int rc;
struct mbuf *m_head;
struct adapter *adapter = txr->adapter;
+ int enqueued = 0;
+ int rc;
IXGBE_TX_LOCK_ASSERT(txr);
@@ -158,6 +159,7 @@ ixgbe_legacy_start_locked(struct ifnet *
if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
break;
}
+ enqueued++;
IFQ_DEQUEUE(&ifp->if_snd, m_head);
if (rc != 0) {
m_freem(m_head);
@@ -167,6 +169,10 @@ ixgbe_legacy_start_locked(struct ifnet *
/* Send a copy of the frame to the BPF listener */
bpf_mtap(ifp, m_head);
}
+ if (enqueued) {
+ txr->lastsent = time_uptime;
+ txr->sending = true;
+ }
return IXGBE_SUCCESS;
} /* ixgbe_legacy_start_locked */
@@ -313,6 +319,11 @@ ixgbe_mq_start_locked(struct ifnet *ifp,
break;
}
+ if (enqueued) {
+ txr->lastsent = time_uptime;
+ txr->sending = true;
+ }
+
if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
ixgbe_txeof(txr);
@@ -537,10 +548,6 @@ retry:
if (m_head->m_flags & M_MCAST)
ifp->if_omcasts++;
- /* Mark queue as having work */
- if (txr->busy == 0)
- txr->busy = 1;
-
return (0);
} /* ixgbe_xmit */
@@ -666,6 +673,7 @@ ixgbe_setup_transmit_ring(struct tx_ring
/* Free any existing tx buffers. */
txbuf = txr->tx_buffers;
for (int i = 0; i < txr->num_desc; i++, txbuf++) {
+ txr->sending = false;
if (txbuf->m_head != NULL) {
bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
0, txbuf->m_head->m_pkthdr.len,
@@ -1126,7 +1134,7 @@ ixgbe_txeof(struct tx_ring *txr)
#endif /* DEV_NETMAP */
if (txr->tx_avail == txr->num_desc) {
- txr->busy = 0;
+ txr->sending = false;
return false;
}
@@ -1208,26 +1216,6 @@ ixgbe_txeof(struct tx_ring *txr)
work += txr->num_desc;
txr->next_to_clean = work;
- /*
- * Queue Hang detection, we know there's
- * work outstanding or the first return
- * would have been taken, so increment busy
- * if nothing managed to get cleaned, then
- * in local_timer it will be checked and
- * marked as HUNG if it exceeds a MAX attempt.
- */
- if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
- ++txr->busy;
- /*
- * If anything gets cleaned we reset state to 1,
- * note this will turn off HUNG if its set.
- */
- if (processed)
- txr->busy = 1;
-
- if (txr->tx_avail == txr->num_desc)
- txr->busy = 0;
-
return ((limit > 0) ? false : true);
} /* ixgbe_txeof */
Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.149 src/sys/dev/pci/ixgbe/ixgbe.c:1.150
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.149 Thu Apr 19 07:40:12 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.c Tue May 8 09:45:54 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.149 2018/04/19 07:40:12 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.150 2018/05/08 09:45:54 msaitoh Exp $ */
/******************************************************************************
@@ -184,6 +184,8 @@ static void ixgbe_free_pciintr_resources
static void ixgbe_free_pci_resources(struct adapter *);
static void ixgbe_local_timer(void *);
static void ixgbe_local_timer1(void *);
+static void ixgbe_watchdog(struct ifnet *);
+static bool ixgbe_watchdog_txq(struct ifnet *, struct tx_ring *, bool *);
static int ixgbe_setup_interface(device_t, struct adapter *);
static void ixgbe_config_gpie(struct adapter *);
static void ixgbe_config_dmac(struct adapter *);
@@ -4257,11 +4259,8 @@ static void
ixgbe_local_timer1(void *arg)
{
struct adapter *adapter = arg;
- device_t dev = adapter->dev;
struct ix_queue *que = adapter->queues;
- u64 queues = 0;
u64 v0, v1, v2, v3, v4, v5, v6, v7;
- int hung = 0;
int i;
KASSERT(mutex_owned(&adapter->core_mtx));
@@ -4298,63 +4297,93 @@ ixgbe_local_timer1(void *arg)
adapter->enomem_tx_dma_setup.ev_count = v6;
adapter->tso_err.ev_count = v7;
- /*
- * Check the TX queues status
- * - mark hung queues so we don't schedule on them
- * - watchdog only if all queues show hung
- */
- que = adapter->queues;
- for (i = 0; i < adapter->num_queues; i++, que++) {
- /* Keep track of queues with work for soft irq */
- if (que->txr->busy)
- queues |= ((u64)1 << que->me);
- /*
- * Each time txeof runs without cleaning, but there
- * are uncleaned descriptors it increments busy. If
- * we get to the MAX we declare it hung.
- */
- if (que->busy == IXGBE_QUEUE_HUNG) {
- ++hung;
- /* Mark the queue as inactive */
- adapter->active_queues &= ~((u64)1 << que->me);
- continue;
- } else {
- /* Check if we've come back from hung */
- if ((adapter->active_queues & ((u64)1 << que->me)) == 0)
- adapter->active_queues |= ((u64)1 << que->me);
- }
- if (que->busy >= IXGBE_MAX_TX_BUSY) {
- device_printf(dev,
- "Warning queue %d appears to be hung!\n", i);
- que->txr->busy = IXGBE_QUEUE_HUNG;
- ++hung;
- }
+ ixgbe_watchdog(adapter->ifp);
+
+out:
+ callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
+} /* ixgbe_local_timer */
+
+static void
+ixgbe_watchdog(struct ifnet *ifp)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct ix_queue *que;
+ struct tx_ring *txr;
+ u64 queues = 0;
+ bool hung = false;
+ bool sending = false;
+ int i;
+
+ txr = adapter->tx_rings;
+ for (i = 0; i < adapter->num_queues; i++, txr++) {
+ hung = ixgbe_watchdog_txq(ifp, txr, &sending);
+ if (hung)
+ break;
+ else if (sending)
+ queues |= ((u64)1 << txr->me);
}
- /* Only truely watchdog if all queues show hung */
- if (hung == adapter->num_queues)
- goto watchdog;
- else if (queues != 0) { /* Force an IRQ on queues with work */
+ if (hung) {
+ ifp->if_flags &= ~IFF_RUNNING;
+ ifp->if_oerrors++;
+ adapter->watchdog_events.ev_count++;
+ ixgbe_init_locked(adapter);
+ } else if (queues != 0) {
+ /*
+ * Force an IRQ on queues with work.
+ *
+ * It's supporsed not to be called ixgbe_rearm_queues() if
+ * any chips have no bug. In reality, ixgbe_rearm_queues() is
+ * required on 82599 and newer chip AND other than queue 0 to
+ * prevent device timeout. When it occured, packet was sent but
+ * the descriptor's DD bot wasn't set even though
+ * IXGBE_TXD_CMD_EOP and IXGBE_TXD_CMD_RS were set. After
+ * forcing interrupt by writing EICS register in
+ * ixgbe_rearm_queues(), DD is set. Why? Is this an
+ * undocumented errata? It might be possible not call
+ * rearm_queues on 82598 or queue 0, we call in any cases in
+ * case the problem occurs.
+ */
que = adapter->queues;
for (i = 0; i < adapter->num_queues; i++, que++) {
+ u64 index = queues & ((u64)1 << i);
+
mutex_enter(&que->dc_mtx);
- if (que->disabled_count == 0)
- ixgbe_rearm_queues(adapter,
- queues & ((u64)1 << i));
+ if ((index != 0) && (que->disabled_count == 0))
+ ixgbe_rearm_queues(adapter, index);
mutex_exit(&que->dc_mtx);
}
}
+}
-out:
- callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
- return;
+static bool
+ixgbe_watchdog_txq(struct ifnet *ifp, struct tx_ring *txr, bool *sending)
+{
+ struct adapter *adapter = ifp->if_softc;
+ device_t dev = adapter->dev;
+ bool hung = false;
+ bool more = false;
-watchdog:
- device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- adapter->ifp->if_flags &= ~IFF_RUNNING;
- adapter->watchdog_events.ev_count++;
- ixgbe_init_locked(adapter);
-} /* ixgbe_local_timer */
+ IXGBE_TX_LOCK(txr);
+ *sending = txr->sending;
+ if (*sending && ((time_uptime - txr->lastsent) > IXGBE_TX_TIMEOUT)) {
+ /*
+ * Since we're using delayed interrupts, sweep up before we
+ * report an error.
+ */
+ do {
+ more = ixgbe_txeof(txr);
+ } while (more);
+ hung = true;
+ device_printf(dev,
+ "Watchdog timeout (queue %d%s)-- resetting\n", txr->me,
+ (txr->tx_avail == txr->num_desc)
+ ? ", lost interrupt?" : "");
+ }
+ IXGBE_TX_UNLOCK(txr);
+
+ return hung;
+}
/************************************************************************
* ixgbe_sfp_probe
@@ -4514,6 +4543,8 @@ ixgbe_stop(void *arg)
struct ifnet *ifp;
struct adapter *adapter = arg;
struct ixgbe_hw *hw = &adapter->hw;
+ struct tx_ring *txr;
+ int i;
ifp = adapter->ifp;
@@ -4523,6 +4554,13 @@ ixgbe_stop(void *arg)
ixgbe_disable_intr(adapter);
callout_stop(&adapter->timer);
+ txr = adapter->tx_rings;
+ for (i = 0; i < adapter->num_queues; i++, txr++) {
+ IXGBE_TX_LOCK(txr);
+ txr->sending = false;
+ IXGBE_TX_UNLOCK(txr);
+ }
+
/* Let the stack know...*/
ifp->if_flags &= ~IFF_RUNNING;
@@ -6116,8 +6154,8 @@ static int
ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
{
device_t dev = adapter->dev;
- struct ix_queue *que = adapter->queues;
- struct tx_ring *txr = adapter->tx_rings;
+ struct ix_queue *que = adapter->queues;
+ struct tx_ring *txr = adapter->tx_rings;
pci_chipset_tag_t pc;
char intrbuf[PCI_INTRSTR_LEN];
char intr_xname[32];
@@ -6457,7 +6495,7 @@ ixgbe_handle_link(void *context)
/************************************************************************
* ixgbe_rearm_queues
************************************************************************/
-static void
+static inline void
ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
{
u32 mask;
Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.46 src/sys/dev/pci/ixgbe/ixgbe.h:1.47
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.46 Wed Apr 25 08:46:19 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.h Tue May 8 09:45:54 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.46 2018/04/25 08:46:19 msaitoh Exp $ */
+/* $NetBSD: ixgbe.h,v 1.47 2018/05/08 09:45:54 msaitoh Exp $ */
/******************************************************************************
SPDX-License-Identifier: BSD-3-Clause
@@ -154,7 +154,7 @@
* pass between any two TX clean operations, such only happening
* when the TX hardware is functioning.
*/
-#define IXGBE_WATCHDOG (10 * hz)
+#define IXGBE_TX_TIMEOUT 10
/*
* This parameters control when the driver calls the routine to reclaim
@@ -219,8 +219,6 @@
#define IXGBE_VFTA_SIZE 128
#define IXGBE_BR_SIZE 4096
#define IXGBE_QUEUE_MIN_FREE 32
-#define IXGBE_MAX_TX_BUSY 10
-#define IXGBE_QUEUE_HUNG 0x80000000
#define IXGBE_EITR_DEFAULT 128
@@ -323,7 +321,6 @@ struct ix_queue {
u32 eitr_setting;
u32 me;
struct resource *res;
- int busy;
struct tx_ring *txr;
struct rx_ring *rxr;
struct work wq_cookie;
@@ -353,7 +350,8 @@ struct tx_ring {
kmutex_t tx_mtx;
u32 me;
u32 tail;
- int busy;
+ bool sending; /* enable/disable watchdog */
+ time_t lastsent;
union ixgbe_adv_tx_desc *tx_base;
struct ixgbe_tx_buf *tx_buffers;
struct ixgbe_dma_alloc txdma;
Index: src/sys/dev/pci/ixgbe/ixv.c
diff -u src/sys/dev/pci/ixgbe/ixv.c:1.95 src/sys/dev/pci/ixgbe/ixv.c:1.96
--- src/sys/dev/pci/ixgbe/ixv.c:1.95 Thu Apr 19 07:40:12 2018
+++ src/sys/dev/pci/ixgbe/ixv.c Tue May 8 09:45:54 2018
@@ -1,4 +1,4 @@
-/*$NetBSD: ixv.c,v 1.95 2018/04/19 07:40:12 msaitoh Exp $*/
+/*$NetBSD: ixv.c,v 1.96 2018/05/08 09:45:54 msaitoh Exp $*/
/******************************************************************************
@@ -101,6 +101,8 @@ static int ixv_configure_interrupts
static void ixv_free_pci_resources(struct adapter *);
static void ixv_local_timer(void *);
static void ixv_local_timer_locked(void *);
+static void ixv_watchdog(struct ifnet *);
+static bool ixv_watchdog_txq(struct ifnet *, struct tx_ring *, bool *);
static int ixv_setup_interface(device_t, struct adapter *);
static int ixv_negotiate_api(struct adapter *);
@@ -1189,11 +1191,8 @@ static void
ixv_local_timer_locked(void *arg)
{
struct adapter *adapter = arg;
- device_t dev = adapter->dev;
struct ix_queue *que = adapter->queues;
- u64 queues = 0;
u64 v0, v1, v2, v3, v4, v5, v6, v7;
- int hung = 0;
int i;
KASSERT(mutex_owned(&adapter->core_mtx));
@@ -1227,57 +1226,85 @@ ixv_local_timer_locked(void *arg)
adapter->enomem_tx_dma_setup.ev_count = v6;
adapter->tso_err.ev_count = v7;
- /*
- * Check the TX queues status
- * - mark hung queues so we don't schedule on them
- * - watchdog only if all queues show hung
- */
- que = adapter->queues;
- for (i = 0; i < adapter->num_queues; i++, que++) {
- /* Keep track of queues with work for soft irq */
- if (que->txr->busy)
- queues |= ((u64)1 << que->me);
+ ixv_watchdog(adapter->ifp);
+
+ callout_reset(&adapter->timer, hz, ixv_local_timer, adapter);
+
+ return;
+} /* ixv_local_timer */
+
+static void
+ixv_watchdog(struct ifnet *ifp)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct ix_queue *que;
+ struct tx_ring *txr;
+ u64 queues = 0;
+ bool hung = false;
+ bool sending = false;
+ int i;
+
+ txr = adapter->tx_rings;
+ for (i = 0; i < adapter->num_queues; i++, txr++) {
+ hung = ixv_watchdog_txq(ifp, txr, &sending);
+ if (hung)
+ break;
+ else if (sending)
+ queues |= ((u64)1 << txr->me);
+ }
+
+ if (hung) {
+ ifp->if_flags &= ~IFF_RUNNING;
+ ifp->if_oerrors++;
+ adapter->watchdog_events.ev_count++;
+ ixv_init_locked(adapter);
+ } else if (queues != 0) {
/*
- * Each time txeof runs without cleaning, but there
- * are uncleaned descriptors it increments busy. If
- * we get to the MAX we declare it hung.
+ * Force an IRQ on queues with work
+ *
+ * Calling ixv_rearm_queues() might not required. I've never
+ * seen any device timeout on ixv(4).
*/
- if (que->busy == IXGBE_QUEUE_HUNG) {
- ++hung;
- /* Mark the queue as inactive */
- adapter->active_queues &= ~((u64)1 << que->me);
- continue;
- } else {
- /* Check if we've come back from hung */
- if ((adapter->active_queues & ((u64)1 << que->me)) == 0)
- adapter->active_queues |= ((u64)1 << que->me);
+ que = adapter->queues;
+ for (i = 0; i < adapter->num_queues; i++, que++) {
+ u64 index = queues & ((u64)1 << i);
+
+ mutex_enter(&que->dc_mtx);
+ if ((index != 0) && (que->disabled_count == 0))
+ ixv_rearm_queues(adapter, index);
+ mutex_exit(&que->dc_mtx);
}
- if (que->busy >= IXGBE_MAX_TX_BUSY) {
- device_printf(dev,
- "Warning queue %d appears to be hung!\n", i);
- que->txr->busy = IXGBE_QUEUE_HUNG;
- ++hung;
- }
- }
-
- /* Only truly watchdog if all queues show hung */
- if (hung == adapter->num_queues)
- goto watchdog;
- else if (queues != 0) { /* Force an IRQ on queues with work */
- ixv_rearm_queues(adapter, queues);
}
+}
- callout_reset(&adapter->timer, hz, ixv_local_timer, adapter);
-
- return;
+static bool
+ixv_watchdog_txq(struct ifnet *ifp, struct tx_ring *txr, bool *sending)
+{
+ struct adapter *adapter = ifp->if_softc;
+ device_t dev = adapter->dev;
+ bool hung = false;
+ bool more = false;
-watchdog:
+ IXGBE_TX_LOCK(txr);
+ *sending = txr->sending;
+ if (*sending && ((time_uptime - txr->lastsent) > IXGBE_TX_TIMEOUT)) {
+ /*
+ * Since we're using delayed interrupts, sweep up before we
+ * report an error.
+ */
+ do {
+ more = ixgbe_txeof(txr);
+ } while (more);
+ hung = true;
+ device_printf(dev,
+ "Watchdog timeout (queue %d%s)-- resetting\n", txr->me,
+ (txr->tx_avail == txr->num_desc)
+ ? ", lost interrupt?" : "");
+ }
+ IXGBE_TX_UNLOCK(txr);
- device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- adapter->ifp->if_flags &= ~IFF_RUNNING;
- adapter->watchdog_events.ev_count++;
- ixv_init_locked(adapter);
-} /* ixv_local_timer */
+ return hung;
+}
/************************************************************************
* ixv_update_link_status - Update OS on link state
@@ -1361,6 +1388,8 @@ ixv_stop(void *arg)
struct ifnet *ifp;
struct adapter *adapter = arg;
struct ixgbe_hw *hw = &adapter->hw;
+ struct tx_ring *txr;
+ int i;
ifp = adapter->ifp;
@@ -1368,6 +1397,14 @@ ixv_stop(void *arg)
INIT_DEBUGOUT("ixv_stop: begin\n");
ixv_disable_intr(adapter);
+ callout_stop(&adapter->timer);
+
+ txr = adapter->tx_rings;
+ for (i = 0; i < adapter->num_queues; i++, txr++) {
+ IXGBE_TX_LOCK(txr);
+ txr->sending = false;
+ IXGBE_TX_UNLOCK(txr);
+ }
/* Tell the stack that the interface is no longer active */
ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
@@ -1375,7 +1412,6 @@ ixv_stop(void *arg)
hw->mac.ops.reset_hw(hw);
adapter->hw.adapter_stopped = FALSE;
hw->mac.ops.stop_adapter(hw);
- callout_stop(&adapter->timer);
/* reprogram the RAR[0] in case user changed it. */
hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
@@ -2877,7 +2913,7 @@ ixv_allocate_msix(struct adapter *adapte
{
device_t dev = adapter->dev;
struct ix_queue *que = adapter->queues;
- struct tx_ring *txr = adapter->tx_rings;
+ struct tx_ring *txr = adapter->tx_rings;
int error, msix_ctrl, rid, vector = 0;
pci_chipset_tag_t pc;
pcitag_t tag;