Module Name: src
Committed By: martin
Date: Thu Mar 8 12:31:25 UTC 2018
Modified Files:
src/sys/dev/pci/ixgbe [netbsd-8]: ixgbe.c ixgbe.h ixv.c
Log Message:
Pull up following revision(s) (requested by knakahara in ticket #612):
sys/dev/pci/ixgbe/ixgbe.c: revision 1.129-1.133
sys/dev/pci/ixgbe/ixgbe.h: revision 1.34
sys/dev/pci/ixgbe/ixv.c: revision 1.85,1.86
- Make "Handled queue in softint" and "Requeued in softint" evcnt(9) per queue
and count them correctly.
- Remove #if 0'ed code.
Don't increment que->req.ev_count in MSI-X interrupt because it's not
reschedule.
Fix another poll mode assumption breaking. Implemented by [email protected], I just
commit by proxy.
ixgbe_rearm_queues() writes EICS register(s). 82599, X540 and X550
specifications say "Following a write of 1b to any bit in the EICS register
(interrupt cause set), its corresponding bit in the EIMS register is auto
set as well enabling its interrupt." in "Extended Interrupt Auto Mask Enable
(EIAM) Register" section. That is, ixgbe_rearm_queues() causes interrupts
regardless of the status managed by ixgbe_enable_queue()/ixgbe_disable_queue().
That can break poll mode assumption.
In fact, the problem occurs in the following situation
- CPU#A has high load traffic, in contrast, CPU#B has not so high load
traffic
- CPU#A is occurred interrupt by its NIC queue
- CPU#A calls ixgbe_disable_queue() in interrupt handler(ixgbe_msix_que())
- CPU#A kick softint handler(ixgbe_handle_que())
- CPU#A begins softint
- CPU#A's NIC queue is set que->txr->busy flag
- With some reason, CPU#A can do ixg interrupt handler
E.g. when one of CPU#A's softnet handlers sleeps, ipl is lowered
- CPU#B starts callout
- CPU#B calls ixgbe_local_timer1()
- CPU#B writes EICS bit corresponding CPU#A's NIC queue bit
- CPU#A's NIC queue causes interrupt whie CPU#A is running in poll mode
- CPU#A calls ixgbe_disable_queue() in interrupt handler *again*
- CPU#A has done polling, and then CPU#A calls ixgbe_enable_queue() *once*
- CPU#A's NIC queue interrupt is disabled until ixg is detached as
ixgbe_disable_queue() is called twice though ixgbe_disable_queue() is
called once only
NOTE:
82598 does not say so, but it is treated in the same way because of no harm.
By the way, we will refactor ixgbe_local_timer(watchdog processing) later.
Fix INTx/MSI handler did not schedule workqueue. Pointed out by [email protected].
Reduce duplicated code which schedule deferred packet processing. No functional
change.
To generate a diff of this commit:
cvs rdiff -u -r1.88.2.12 -r1.88.2.13 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.24.6.5 -r1.24.6.6 src/sys/dev/pci/ixgbe/ixgbe.h
cvs rdiff -u -r1.56.2.9 -r1.56.2.10 src/sys/dev/pci/ixgbe/ixv.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.12 src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.13
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.12 Tue Mar 6 11:12:41 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.c Thu Mar 8 12:31:25 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.88.2.12 2018/03/06 11:12:41 martin Exp $ */
+/* $NetBSD: ixgbe.c,v 1.88.2.13 2018/03/08 12:31:25 martin Exp $ */
/******************************************************************************
@@ -1686,10 +1686,6 @@ ixgbe_add_hw_stats(struct adapter *adapt
const char *xname = device_xname(dev);
/* Driver Statistics */
- evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
- NULL, xname, "Handled queue in softint");
- evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
- NULL, xname, "Requeued in softint");
evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
NULL, xname, "Driver tx dma soft fail EFBIG");
evcnt_attach_dynamic(&adapter->mbuf_defrag_failed, EVCNT_TYPE_MISC,
@@ -1736,15 +1732,6 @@ ixgbe_add_hw_stats(struct adapter *adapt
(void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
break;
-#if 0 /* XXX msaitoh */
- if (sysctl_createv(log, 0, &rnode, &cnode,
- CTLFLAG_READONLY, CTLTYPE_QUAD,
- "irqs", SYSCTL_DESCR("irqs on this queue"),
- NULL, 0, &(adapter->queues[i].irqs),
- 0, CTL_CREATE, CTL_EOL) != 0)
- break;
-#endif
-
if (sysctl_createv(log, 0, &rnode, &cnode,
CTLFLAG_READONLY, CTLTYPE_INT,
"txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
@@ -1761,6 +1748,11 @@ ixgbe_add_hw_stats(struct adapter *adapt
evcnt_attach_dynamic(&adapter->queues[i].irqs, EVCNT_TYPE_INTR,
NULL, adapter->queues[i].evnamebuf, "IRQs on queue");
+ evcnt_attach_dynamic(&adapter->queues[i].handleq,
+ EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
+ "Handled queue in softint");
+ evcnt_attach_dynamic(&adapter->queues[i].req, EVCNT_TYPE_MISC,
+ NULL, adapter->queues[i].evnamebuf, "Requeued in softint");
evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
NULL, adapter->queues[i].evnamebuf, "TSO");
evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
@@ -1981,8 +1973,6 @@ ixgbe_clear_evcnt(struct adapter *adapte
struct ixgbe_hw *hw = &adapter->hw;
struct ixgbe_hw_stats *stats = &adapter->stats.pf;
- adapter->handleq.ev_count = 0;
- adapter->req.ev_count = 0;
adapter->efbig_tx_dma_setup.ev_count = 0;
adapter->mbuf_defrag_failed.ev_count = 0;
adapter->efbig2_tx_dma_setup.ev_count = 0;
@@ -1997,6 +1987,8 @@ ixgbe_clear_evcnt(struct adapter *adapte
txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
adapter->queues[i].irqs.ev_count = 0;
+ adapter->queues[i].handleq.ev_count = 0;
+ adapter->queues[i].req.ev_count = 0;
txr->no_desc_avail.ev_count = 0;
txr->total_packets.ev_count = 0;
txr->tso_tx.ev_count = 0;
@@ -2447,6 +2439,36 @@ out:
} /* ixgbe_disable_queue */
/************************************************************************
+ * ixgbe_sched_handle_que - schedule deferred packet processing
+ ************************************************************************/
+static inline void
+ixgbe_sched_handle_que(struct adapter *adapter, struct ix_queue *que)
+{
+
+ if (adapter->txrx_use_workqueue) {
+ /*
+ * adapter->que_wq is bound to each CPU instead of
+ * each NIC queue to reduce workqueue kthread. As we
+ * should consider about interrupt affinity in this
+ * function, the workqueue kthread must be WQ_PERCPU.
+ * If create WQ_PERCPU workqueue kthread for each NIC
+ * queue, that number of created workqueue kthread is
+ * (number of used NIC queue) * (number of CPUs) =
+ * (number of CPUs) ^ 2 most often.
+ *
+ * The same NIC queue's interrupts are avoided by
+ * masking the queue's interrupt. And different
+ * NIC queue's interrupts use different struct work
+ * (que->wq_cookie). So, "enqueued flag" to avoid
+ * twice workqueue_enqueue() is not required .
+ */
+ workqueue_enqueue(adapter->que_wq, &que->wq_cookie, curcpu());
+ } else {
+ softint_schedule(que->que_si);
+ }
+}
+
+/************************************************************************
* ixgbe_msix_que - MSI-X Queue Interrupt Service routine
************************************************************************/
static int
@@ -2534,30 +2556,9 @@ ixgbe_msix_que(void *arg)
rxr->packets = 0;
no_calc:
- if (more) {
- if (adapter->txrx_use_workqueue) {
- /*
- * adapter->que_wq is bound to each CPU instead of
- * each NIC queue to reduce workqueue kthread. As we
- * should consider about interrupt affinity in this
- * function, the workqueue kthread must be WQ_PERCPU.
- * If create WQ_PERCPU workqueue kthread for each NIC
- * queue, that number of created workqueue kthread is
- * (number of used NIC queue) * (number of CPUs) =
- * (number of CPUs) ^ 2 most often.
- *
- * The same NIC queue's interrupts are avoided by
- * masking the queue's interrupt. And different
- * NIC queue's interrupts use different struct work
- * (que->wq_cookie). So, "enqueued flag" to avoid
- * twice workqueue_enqueue() is not required .
- */
- workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
- curcpu());
- } else {
- softint_schedule(que->que_si);
- }
- } else
+ if (more)
+ ixgbe_sched_handle_que(adapter, que);
+ else
ixgbe_enable_queue(adapter, que->msix);
return 1;
@@ -3376,8 +3377,6 @@ ixgbe_detach(device_t dev, int flags)
if_percpuq_destroy(adapter->ipq);
sysctl_teardown(&adapter->sysctllog);
- evcnt_detach(&adapter->handleq);
- evcnt_detach(&adapter->req);
evcnt_detach(&adapter->efbig_tx_dma_setup);
evcnt_detach(&adapter->mbuf_defrag_failed);
evcnt_detach(&adapter->efbig2_tx_dma_setup);
@@ -3392,6 +3391,8 @@ ixgbe_detach(device_t dev, int flags)
txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
evcnt_detach(&adapter->queues[i].irqs);
+ evcnt_detach(&adapter->queues[i].handleq);
+ evcnt_detach(&adapter->queues[i].req);
evcnt_detach(&txr->no_desc_avail);
evcnt_detach(&txr->total_packets);
evcnt_detach(&txr->tso_tx);
@@ -4237,7 +4238,14 @@ ixgbe_local_timer1(void *arg)
if (hung == adapter->num_queues)
goto watchdog;
else if (queues != 0) { /* Force an IRQ on queues with work */
- ixgbe_rearm_queues(adapter, queues);
+ que = adapter->queues;
+ for (int i = 0; i < adapter->num_queues; i++, que++) {
+ mutex_enter(&que->im_mtx);
+ if (que->im_nest == 0)
+ ixgbe_rearm_queues(adapter,
+ queues & ((u64)1 << i));
+ mutex_exit(&que->im_mtx);
+ }
}
out:
@@ -4724,9 +4732,10 @@ ixgbe_legacy_irq(void *arg)
(eicr & IXGBE_EICR_GPI_SDP0_X540))
softint_schedule(adapter->phy_si);
- if (more)
- softint_schedule(que->que_si);
- else
+ if (more) {
+ que->req.ev_count++;
+ ixgbe_sched_handle_que(adapter, que);
+ } else
ixgbe_enable_intr(adapter);
return 1;
@@ -5825,7 +5834,7 @@ ixgbe_handle_que(void *context)
struct ifnet *ifp = adapter->ifp;
bool more = false;
- adapter->handleq.ev_count++;
+ que->handleq.ev_count++;
if (ifp->if_flags & IFF_RUNNING) {
more = ixgbe_rxeof(que);
@@ -5843,16 +5852,8 @@ ixgbe_handle_que(void *context)
}
if (more) {
- if (adapter->txrx_use_workqueue) {
- /*
- * "enqueued flag" is not required here.
- * See ixgbe_msix_que().
- */
- workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
- curcpu());
- } else {
- softint_schedule(que->que_si);
- }
+ que->req.ev_count++;
+ ixgbe_sched_handle_que(adapter, que);
} else if (que->res != NULL) {
/* Re-enable this interrupt */
ixgbe_enable_queue(adapter, que->msix);
Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.5 src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.6
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.5 Tue Mar 6 11:12:40 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.h Thu Mar 8 12:31:25 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.24.6.5 2018/03/06 11:12:40 martin Exp $ */
+/* $NetBSD: ixgbe.h,v 1.24.6.6 2018/03/08 12:31:25 martin Exp $ */
/******************************************************************************
SPDX-License-Identifier: BSD-3-Clause
@@ -335,6 +335,8 @@ struct ix_queue {
struct work wq_cookie;
void *que_si;
struct evcnt irqs;
+ struct evcnt handleq;
+ struct evcnt req;
char namebuf[32];
char evnamebuf[32];
@@ -566,8 +568,6 @@ struct adapter {
struct evcnt tso_err;
struct evcnt watchdog_events;
struct evcnt link_irq;
- struct evcnt handleq;
- struct evcnt req;
union {
struct ixgbe_hw_stats pf;
Index: src/sys/dev/pci/ixgbe/ixv.c
diff -u src/sys/dev/pci/ixgbe/ixv.c:1.56.2.9 src/sys/dev/pci/ixgbe/ixv.c:1.56.2.10
--- src/sys/dev/pci/ixgbe/ixv.c:1.56.2.9 Tue Mar 6 11:12:41 2018
+++ src/sys/dev/pci/ixgbe/ixv.c Thu Mar 8 12:31:25 2018
@@ -1,4 +1,4 @@
-/*$NetBSD: ixv.c,v 1.56.2.9 2018/03/06 11:12:41 martin Exp $*/
+/*$NetBSD: ixv.c,v 1.56.2.10 2018/03/08 12:31:25 martin Exp $*/
/******************************************************************************
@@ -647,8 +647,6 @@ ixv_detach(device_t dev, int flags)
if_percpuq_destroy(adapter->ipq);
sysctl_teardown(&adapter->sysctllog);
- evcnt_detach(&adapter->handleq);
- evcnt_detach(&adapter->req);
evcnt_detach(&adapter->efbig_tx_dma_setup);
evcnt_detach(&adapter->mbuf_defrag_failed);
evcnt_detach(&adapter->efbig2_tx_dma_setup);
@@ -663,6 +661,8 @@ ixv_detach(device_t dev, int flags)
txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
evcnt_detach(&adapter->queues[i].irqs);
+ evcnt_detach(&adapter->queues[i].handleq);
+ evcnt_detach(&adapter->queues[i].req);
evcnt_detach(&txr->no_desc_avail);
evcnt_detach(&txr->total_packets);
evcnt_detach(&txr->tso_tx);
@@ -2340,10 +2340,6 @@ ixv_add_stats_sysctls(struct adapter *ad
const char *xname = device_xname(dev);
/* Driver Statistics */
- evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
- NULL, xname, "Handled queue in softint");
- evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
- NULL, xname, "Requeued in softint");
evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
NULL, xname, "Driver tx dma soft fail EFBIG");
evcnt_attach_dynamic(&adapter->mbuf_defrag_failed, EVCNT_TYPE_MISC,
@@ -2390,15 +2386,6 @@ ixv_add_stats_sysctls(struct adapter *ad
(void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
break;
-#if 0
- if (sysctl_createv(log, 0, &rnode, &cnode,
- CTLFLAG_READONLY, CTLTYPE_QUAD,
- "irqs", SYSCTL_DESCR("irqs on this queue"),
- NULL, 0, &(adapter->queues[i].irqs),
- 0, CTL_CREATE, CTL_EOL) != 0)
- break;
-#endif
-
if (sysctl_createv(log, 0, &rnode, &cnode,
CTLFLAG_READONLY, CTLTYPE_INT,
"txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
@@ -2415,6 +2402,11 @@ ixv_add_stats_sysctls(struct adapter *ad
evcnt_attach_dynamic(&adapter->queues[i].irqs, EVCNT_TYPE_INTR,
NULL, adapter->queues[i].evnamebuf, "IRQs on queue");
+ evcnt_attach_dynamic(&adapter->queues[i].handleq,
+ EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
+ "Handled queue in softint");
+ evcnt_attach_dynamic(&adapter->queues[i].req, EVCNT_TYPE_MISC,
+ NULL, adapter->queues[i].evnamebuf, "Requeued in softint");
evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
NULL, adapter->queues[i].evnamebuf, "TSO");
evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
@@ -2800,7 +2792,7 @@ ixv_handle_que(void *context)
struct ifnet *ifp = adapter->ifp;
bool more;
- adapter->handleq.ev_count++;
+ que->handleq.ev_count++;
if (ifp->if_flags & IFF_RUNNING) {
more = ixgbe_rxeof(que);
@@ -2816,7 +2808,7 @@ ixv_handle_que(void *context)
ixgbe_legacy_start_locked(ifp, txr);
IXGBE_TX_UNLOCK(txr);
if (more) {
- adapter->req.ev_count++;
+ que->req.ev_count++;
if (adapter->txrx_use_workqueue) {
/*
* "enqueued flag" is not required here