Module Name:    src
Committed By:   martin
Date:           Thu Mar  8 12:31:25 UTC 2018

Modified Files:
        src/sys/dev/pci/ixgbe [netbsd-8]: ixgbe.c ixgbe.h ixv.c

Log Message:
Pull up following revision(s) (requested by knakahara in ticket #612):
        sys/dev/pci/ixgbe/ixgbe.c: revision 1.129-1.133
        sys/dev/pci/ixgbe/ixgbe.h: revision 1.34
        sys/dev/pci/ixgbe/ixv.c: revision 1.85,1.86

- Make "Handled queue in softint" and "Requeued in softint" evcnt(9) per queue
  and count them correctly.
- Remove #if 0'ed code.

Don't increment que->req.ev_count in MSI-X interrupt because it's not
reschedule.

Fix another poll mode assumption breaking. Implemented by [email protected], I just 
commit by proxy.
ixgbe_rearm_queues() writes EICS register(s). 82599, X540 and X550
specifications say "Following a write of 1b to any bit in the EICS register
(interrupt cause set), its corresponding bit in the EIMS register is auto
set as well enabling its interrupt." in "Extended Interrupt Auto Mask Enable
(EIAM) Register" section. That is, ixgbe_rearm_queues() causes interrupts
regardless of the status managed by ixgbe_enable_queue()/ixgbe_disable_queue().

That can break poll mode assumption.

In fact, the problem occurs in the following situation
    - CPU#A has high load traffic, in contrast, CPU#B has not so high load 
traffic
    - CPU#A is occurred interrupt by its NIC queue
      - CPU#A calls ixgbe_disable_queue() in interrupt handler(ixgbe_msix_que())
      - CPU#A kick softint handler(ixgbe_handle_que())
        - CPU#A begins softint
        - CPU#A's NIC queue is set que->txr->busy flag
        - With some reason, CPU#A can do ixg interrupt handler
          E.g. when one of CPU#A's softnet handlers sleeps, ipl is lowered
    - CPU#B starts callout
      - CPU#B calls ixgbe_local_timer1()
        - CPU#B writes EICS bit corresponding CPU#A's NIC queue bit
    - CPU#A's NIC queue causes interrupt whie CPU#A is running in poll mode
      - CPU#A calls ixgbe_disable_queue() in interrupt handler *again*
    - CPU#A has done polling, and then CPU#A calls ixgbe_enable_queue() *once*
    - CPU#A's NIC queue interrupt is disabled until ixg is detached as
      ixgbe_disable_queue() is called twice though ixgbe_disable_queue() is
      called once only
NOTE:
82598 does not say so, but it is treated in the same way because of no harm.
By the way, we will refactor ixgbe_local_timer(watchdog processing) later.

Fix INTx/MSI handler did not schedule workqueue. Pointed out by [email protected].

Reduce duplicated code which schedule deferred packet processing. No functional 
change.


To generate a diff of this commit:
cvs rdiff -u -r1.88.2.12 -r1.88.2.13 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.24.6.5 -r1.24.6.6 src/sys/dev/pci/ixgbe/ixgbe.h
cvs rdiff -u -r1.56.2.9 -r1.56.2.10 src/sys/dev/pci/ixgbe/ixv.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.12 src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.13
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.12	Tue Mar  6 11:12:41 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.c	Thu Mar  8 12:31:25 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.88.2.12 2018/03/06 11:12:41 martin Exp $ */
+/* $NetBSD: ixgbe.c,v 1.88.2.13 2018/03/08 12:31:25 martin Exp $ */
 
 /******************************************************************************
 
@@ -1686,10 +1686,6 @@ ixgbe_add_hw_stats(struct adapter *adapt
 	const char *xname = device_xname(dev);
 
 	/* Driver Statistics */
-	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
-	    NULL, xname, "Handled queue in softint");
-	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
-	    NULL, xname, "Requeued in softint");
 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
 	    NULL, xname, "Driver tx dma soft fail EFBIG");
 	evcnt_attach_dynamic(&adapter->mbuf_defrag_failed, EVCNT_TYPE_MISC,
@@ -1736,15 +1732,6 @@ ixgbe_add_hw_stats(struct adapter *adapt
 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
 			break;
 
-#if 0 /* XXX msaitoh */
-		if (sysctl_createv(log, 0, &rnode, &cnode,
-		    CTLFLAG_READONLY, CTLTYPE_QUAD,
-		    "irqs", SYSCTL_DESCR("irqs on this queue"),
-			NULL, 0, &(adapter->queues[i].irqs),
-		    0, CTL_CREATE, CTL_EOL) != 0)
-			break;
-#endif
-
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READONLY, CTLTYPE_INT,
 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
@@ -1761,6 +1748,11 @@ ixgbe_add_hw_stats(struct adapter *adapt
 
 		evcnt_attach_dynamic(&adapter->queues[i].irqs, EVCNT_TYPE_INTR,
 		    NULL, adapter->queues[i].evnamebuf, "IRQs on queue");
+		evcnt_attach_dynamic(&adapter->queues[i].handleq,
+		    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
+		    "Handled queue in softint");
+		evcnt_attach_dynamic(&adapter->queues[i].req, EVCNT_TYPE_MISC,
+		    NULL, adapter->queues[i].evnamebuf, "Requeued in softint");
 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
 		    NULL, adapter->queues[i].evnamebuf, "TSO");
 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
@@ -1981,8 +1973,6 @@ ixgbe_clear_evcnt(struct adapter *adapte
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_hw_stats *stats = &adapter->stats.pf;
 
-	adapter->handleq.ev_count = 0;
-	adapter->req.ev_count = 0;
 	adapter->efbig_tx_dma_setup.ev_count = 0;
 	adapter->mbuf_defrag_failed.ev_count = 0;
 	adapter->efbig2_tx_dma_setup.ev_count = 0;
@@ -1997,6 +1987,8 @@ ixgbe_clear_evcnt(struct adapter *adapte
 	txr = adapter->tx_rings;
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
 		adapter->queues[i].irqs.ev_count = 0;
+		adapter->queues[i].handleq.ev_count = 0;
+		adapter->queues[i].req.ev_count = 0;
 		txr->no_desc_avail.ev_count = 0;
 		txr->total_packets.ev_count = 0;
 		txr->tso_tx.ev_count = 0;
@@ -2447,6 +2439,36 @@ out:
 } /* ixgbe_disable_queue */
 
 /************************************************************************
+ * ixgbe_sched_handle_que - schedule deferred packet processing
+ ************************************************************************/
+static inline void
+ixgbe_sched_handle_que(struct adapter *adapter, struct ix_queue *que)
+{
+
+	if (adapter->txrx_use_workqueue) {
+		/*
+		 * adapter->que_wq is bound to each CPU instead of
+		 * each NIC queue to reduce workqueue kthread. As we
+		 * should consider about interrupt affinity in this
+		 * function, the workqueue kthread must be WQ_PERCPU.
+		 * If create WQ_PERCPU workqueue kthread for each NIC
+		 * queue, that number of created workqueue kthread is
+		 * (number of used NIC queue) * (number of CPUs) =
+		 * (number of CPUs) ^ 2 most often.
+		 *
+		 * The same NIC queue's interrupts are avoided by
+		 * masking the queue's interrupt. And different
+		 * NIC queue's interrupts use different struct work
+		 * (que->wq_cookie). So, "enqueued flag" to avoid
+		 * twice workqueue_enqueue() is not required .
+		 */
+		workqueue_enqueue(adapter->que_wq, &que->wq_cookie, curcpu());
+	} else {
+		softint_schedule(que->que_si);
+	}
+}
+
+/************************************************************************
  * ixgbe_msix_que - MSI-X Queue Interrupt Service routine
  ************************************************************************/
 static int
@@ -2534,30 +2556,9 @@ ixgbe_msix_que(void *arg)
 	rxr->packets = 0;
 
 no_calc:
-	if (more) {
-		if (adapter->txrx_use_workqueue) {
-			/*
-			 * adapter->que_wq is bound to each CPU instead of
-			 * each NIC queue to reduce workqueue kthread. As we
-			 * should consider about interrupt affinity in this
-			 * function, the workqueue kthread must be WQ_PERCPU.
-			 * If create WQ_PERCPU workqueue kthread for each NIC
-			 * queue, that number of created workqueue kthread is
-			 * (number of used NIC queue) * (number of CPUs) =
-			 * (number of CPUs) ^ 2 most often.
-			 *
-			 * The same NIC queue's interrupts are avoided by
-			 * masking the queue's interrupt. And different
-			 * NIC queue's interrupts use different struct work
-			 * (que->wq_cookie). So, "enqueued flag" to avoid
-			 * twice workqueue_enqueue() is not required .
-			 */
-			workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
-			    curcpu());
-		} else {
-			softint_schedule(que->que_si);
-		}
-	} else
+	if (more)
+		ixgbe_sched_handle_que(adapter, que);
+	else
 		ixgbe_enable_queue(adapter, que->msix);
 
 	return 1;
@@ -3376,8 +3377,6 @@ ixgbe_detach(device_t dev, int flags)
 	if_percpuq_destroy(adapter->ipq);
 
 	sysctl_teardown(&adapter->sysctllog);
-	evcnt_detach(&adapter->handleq);
-	evcnt_detach(&adapter->req);
 	evcnt_detach(&adapter->efbig_tx_dma_setup);
 	evcnt_detach(&adapter->mbuf_defrag_failed);
 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
@@ -3392,6 +3391,8 @@ ixgbe_detach(device_t dev, int flags)
 	txr = adapter->tx_rings;
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
 		evcnt_detach(&adapter->queues[i].irqs);
+		evcnt_detach(&adapter->queues[i].handleq);
+		evcnt_detach(&adapter->queues[i].req);
 		evcnt_detach(&txr->no_desc_avail);
 		evcnt_detach(&txr->total_packets);
 		evcnt_detach(&txr->tso_tx);
@@ -4237,7 +4238,14 @@ ixgbe_local_timer1(void *arg)
 	if (hung == adapter->num_queues)
 		goto watchdog;
 	else if (queues != 0) { /* Force an IRQ on queues with work */
-		ixgbe_rearm_queues(adapter, queues);
+		que = adapter->queues;
+		for (int i = 0; i < adapter->num_queues; i++, que++) {
+			mutex_enter(&que->im_mtx);
+			if (que->im_nest == 0)
+				ixgbe_rearm_queues(adapter,
+				    queues & ((u64)1 << i));
+			mutex_exit(&que->im_mtx);
+		}
 	}
 
 out:
@@ -4724,9 +4732,10 @@ ixgbe_legacy_irq(void *arg)
 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
 		softint_schedule(adapter->phy_si);
 
-	if (more)
-		softint_schedule(que->que_si);
-	else
+	if (more) {
+		que->req.ev_count++;
+		ixgbe_sched_handle_que(adapter, que);
+	} else
 		ixgbe_enable_intr(adapter);
 
 	return 1;
@@ -5825,7 +5834,7 @@ ixgbe_handle_que(void *context)
 	struct ifnet    *ifp = adapter->ifp;
 	bool		more = false;
 
-	adapter->handleq.ev_count++;
+	que->handleq.ev_count++;
 
 	if (ifp->if_flags & IFF_RUNNING) {
 		more = ixgbe_rxeof(que);
@@ -5843,16 +5852,8 @@ ixgbe_handle_que(void *context)
 	}
 
 	if (more) {
-		if (adapter->txrx_use_workqueue) {
-			/*
-			 * "enqueued flag" is not required here.
-			 * See ixgbe_msix_que().
-			 */
-			workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
-			    curcpu());
-		} else {
-			softint_schedule(que->que_si);
-		}
+		que->req.ev_count++;
+		ixgbe_sched_handle_que(adapter, que);
 	} else if (que->res != NULL) {
 		/* Re-enable this interrupt */
 		ixgbe_enable_queue(adapter, que->msix);

Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.5 src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.6
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.5	Tue Mar  6 11:12:40 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.h	Thu Mar  8 12:31:25 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.24.6.5 2018/03/06 11:12:40 martin Exp $ */
+/* $NetBSD: ixgbe.h,v 1.24.6.6 2018/03/08 12:31:25 martin Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -335,6 +335,8 @@ struct ix_queue {
 	struct work      wq_cookie;
 	void             *que_si;
 	struct evcnt     irqs;
+	struct evcnt     handleq;
+	struct evcnt     req;
 	char             namebuf[32];
 	char             evnamebuf[32];
 
@@ -566,8 +568,6 @@ struct adapter {
 	struct evcnt	   	tso_err;
 	struct evcnt	   	watchdog_events;
 	struct evcnt		link_irq;
-	struct evcnt		handleq;
-	struct evcnt		req;
 
 	union {
 		struct ixgbe_hw_stats pf;

Index: src/sys/dev/pci/ixgbe/ixv.c
diff -u src/sys/dev/pci/ixgbe/ixv.c:1.56.2.9 src/sys/dev/pci/ixgbe/ixv.c:1.56.2.10
--- src/sys/dev/pci/ixgbe/ixv.c:1.56.2.9	Tue Mar  6 11:12:41 2018
+++ src/sys/dev/pci/ixgbe/ixv.c	Thu Mar  8 12:31:25 2018
@@ -1,4 +1,4 @@
-/*$NetBSD: ixv.c,v 1.56.2.9 2018/03/06 11:12:41 martin Exp $*/
+/*$NetBSD: ixv.c,v 1.56.2.10 2018/03/08 12:31:25 martin Exp $*/
 
 /******************************************************************************
 
@@ -647,8 +647,6 @@ ixv_detach(device_t dev, int flags)
 	if_percpuq_destroy(adapter->ipq);
 
 	sysctl_teardown(&adapter->sysctllog);
-	evcnt_detach(&adapter->handleq);
-	evcnt_detach(&adapter->req);
 	evcnt_detach(&adapter->efbig_tx_dma_setup);
 	evcnt_detach(&adapter->mbuf_defrag_failed);
 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
@@ -663,6 +661,8 @@ ixv_detach(device_t dev, int flags)
 	txr = adapter->tx_rings;
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
 		evcnt_detach(&adapter->queues[i].irqs);
+		evcnt_detach(&adapter->queues[i].handleq);
+		evcnt_detach(&adapter->queues[i].req);
 		evcnt_detach(&txr->no_desc_avail);
 		evcnt_detach(&txr->total_packets);
 		evcnt_detach(&txr->tso_tx);
@@ -2340,10 +2340,6 @@ ixv_add_stats_sysctls(struct adapter *ad
 	const char *xname = device_xname(dev);
 
 	/* Driver Statistics */
-	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
-	    NULL, xname, "Handled queue in softint");
-	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
-	    NULL, xname, "Requeued in softint");
 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
 	    NULL, xname, "Driver tx dma soft fail EFBIG");
 	evcnt_attach_dynamic(&adapter->mbuf_defrag_failed, EVCNT_TYPE_MISC,
@@ -2390,15 +2386,6 @@ ixv_add_stats_sysctls(struct adapter *ad
 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
 			break;
 
-#if 0
-		if (sysctl_createv(log, 0, &rnode, &cnode,
-		    CTLFLAG_READONLY, CTLTYPE_QUAD,
-		    "irqs", SYSCTL_DESCR("irqs on this queue"),
-			NULL, 0, &(adapter->queues[i].irqs),
-		    0, CTL_CREATE, CTL_EOL) != 0)
-			break;
-#endif
-
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READONLY, CTLTYPE_INT,
 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
@@ -2415,6 +2402,11 @@ ixv_add_stats_sysctls(struct adapter *ad
 
 		evcnt_attach_dynamic(&adapter->queues[i].irqs, EVCNT_TYPE_INTR,
 		    NULL, adapter->queues[i].evnamebuf, "IRQs on queue");
+		evcnt_attach_dynamic(&adapter->queues[i].handleq,
+		    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
+		    "Handled queue in softint");
+		evcnt_attach_dynamic(&adapter->queues[i].req, EVCNT_TYPE_MISC,
+		    NULL, adapter->queues[i].evnamebuf, "Requeued in softint");
 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
 		    NULL, adapter->queues[i].evnamebuf, "TSO");
 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
@@ -2800,7 +2792,7 @@ ixv_handle_que(void *context)
 	struct ifnet    *ifp = adapter->ifp;
 	bool		more;
 
-	adapter->handleq.ev_count++;
+	que->handleq.ev_count++;
 
 	if (ifp->if_flags & IFF_RUNNING) {
 		more = ixgbe_rxeof(que);
@@ -2816,7 +2808,7 @@ ixv_handle_que(void *context)
 			ixgbe_legacy_start_locked(ifp, txr);
 		IXGBE_TX_UNLOCK(txr);
 		if (more) {
-			adapter->req.ev_count++;
+			que->req.ev_count++;
 			if (adapter->txrx_use_workqueue) {
 				/*
 				 * "enqueued flag" is not required here

Reply via email to