Module Name:    src
Committed By:   msaitoh
Date:           Tue Mar  9 10:03:18 UTC 2021

Modified Files:
        src/share/man/man4: ixg.4 ixv.4
        src/sys/dev/pci: files.pci
        src/sys/dev/pci/ixgbe: ix_txrx.c ixgbe.c ixgbe.h ixgbe_netbsd.h ixv.c

Log Message:
Modify some parameters to reduce packet dropping.

 - Background: ixgbe doesn't use common MCLGET() interface and use the
   driver specific cluster allocation mechanism (jcl). The cluster is
   pre-allocated with a fixed number and the current number per queue
   is num_rx_desc * 2 (2048*2=4096). It's too small. It also has a problem
   that the max length of the pcq which is used in the TX path is big
   (4096). Example:

    100M <----- [ixg0                                      ixg1] <----- 1G
                2048 TX descs <--- 4096 pcqs <---- 2048 RX descs

   If a machine forwards a traffic from 1G interface to 100M interface,
   It would require 2048+4096+2048=8192 descriptors, but the current number
   is 2048*2=4096. It's too small. Even if the both interface's link speed
   is the same and only small number of packet is queued in the pcq, 4096
   jcl is small because 2048(RX)+TX(2048)=4096. If jcl is exhausted, not only
   forwarding from ixg1 to ixg0 is dropped, but also another forwarding path
   from ixg1 to another interface(e.g. wm0) is also dropped. Sockets also
   queue packets, so if a lot of sockets are used and/or a socket buffer
   size is changed to bigger one, it'll also become a problem. If the jcl
   is exhausted, evcnt(9) counter "ixgX qY Rx no jumbo mbuf" is incremented.
   Example:
     vmstat -ev | grep ixg1 | grep "no jumbo"
     ixg1 q0 Rx no jumbo mbuf                           0     0 misc
     ixg1 q1 Rx no jumbo mbuf                           0     0 misc
     ixg1 q2 Rx no jumbo mbuf                      141326     0 misc
     ixg1 q3 Rx no jumbo mbuf                           0     0 misc

 - To solve this problem:
   - Add new config parameter IXGBE_JCLNUM_MULTI and set the default to 3
     (2048 * 3). The minimum number is 2. The total number of jcl per queue
     is available with hw.ixgN.num_jcl_per_queue sysctl.
   - Reduce the max length of the pcq() which is used in the TX path from
     4096 to 2048.

 - Reviewed by knakahara@ and ozaki-r@.

 - TODO: Use MCLGET().


To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/share/man/man4/ixg.4
cvs rdiff -u -r1.5 -r1.6 src/share/man/man4/ixv.4
cvs rdiff -u -r1.435 -r1.436 src/sys/dev/pci/files.pci
cvs rdiff -u -r1.66 -r1.67 src/sys/dev/pci/ixgbe/ix_txrx.c
cvs rdiff -u -r1.278 -r1.279 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.74 -r1.75 src/sys/dev/pci/ixgbe/ixgbe.h
cvs rdiff -u -r1.11 -r1.12 src/sys/dev/pci/ixgbe/ixgbe_netbsd.h
cvs rdiff -u -r1.154 -r1.155 src/sys/dev/pci/ixgbe/ixv.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/share/man/man4/ixg.4
diff -u src/share/man/man4/ixg.4:1.12 src/share/man/man4/ixg.4:1.13
--- src/share/man/man4/ixg.4:1.12	Wed May  9 08:01:16 2018
+++ src/share/man/man4/ixg.4	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-.\" $NetBSD: ixg.4,v 1.12 2018/05/09 08:01:16 wiz Exp $
+.\" $NetBSD: ixg.4,v 1.13 2021/03/09 10:03:18 msaitoh Exp $
 .\"
 .\" Copyright (c) 2001-2008, Intel Corporation
 .\" All rights reserved.
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD: src/share/man/man4/ixgbe.4,v 1.3 2010/12/19 23:54:31 yongari Exp $
 .\"
-.Dd May 9, 2018
+.Dd March 9, 2021
 .Dt IXG 4
 .Os
 .Sh NAME
@@ -83,6 +83,24 @@ go to the Intel support website at:
 .\" with a supported adapter, email the specific information related to the
 .\" issue to
 .\" .Aq freebsd...@mailbox.intel.com .
+.Sh OPTIONS
+The
+.Nm
+driver doesn't use the common
+.Xr MCLGET 9
+interface and use the driver specific cluster allocation mechanism.
+If it's exhausted,
+.Xr evcnt 9
+counter "ixgX qY Rx no jumbo mbuf" is incremented. If it's observed,
+The number can be changed by the following config parameter:
+.Bl -tag -width IXGBE_JCLNUM_MULTI -offset 3n
+.It Dv IXGBE_JCLNUM_MULTI
+The number of RX jumbo buffer (cluster) per queue is calculated by
+.Dv IXGBE_JCLNUM_MULTI
+* (number of rx descriptor).
+The total number of clusters per queue is available with
+hw.ixgN.num_jcl_per_queue sysctl.
+.El
 .Sh SEE ALSO
 .Xr arp 4 ,
 .Xr ixv 4 ,

Index: src/share/man/man4/ixv.4
diff -u src/share/man/man4/ixv.4:1.5 src/share/man/man4/ixv.4:1.6
--- src/share/man/man4/ixv.4:1.5	Thu Sep  5 10:01:30 2019
+++ src/share/man/man4/ixv.4	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-.\"	$NetBSD: ixv.4,v 1.5 2019/09/05 10:01:30 msaitoh Exp $
+.\"	$NetBSD: ixv.4,v 1.6 2021/03/09 10:03:18 msaitoh Exp $
 .\"
 .\" Copyright (c) 2018 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd September 5, 2019
+.Dd March 9, 2021
 .Dt IXV 4
 .Os
 .Sh NAME
@@ -43,6 +43,24 @@ newer chips support.
 It can be used on a
 .Nx
 guest that the host supports SR-IOV.
+.Sh OPTIONS
+The
+.Nm
+driver doesn't use the common
+.Xr MCLGET 9
+interface and use the driver specific cluster allocation mechanism.
+If it's exhausted,
+.Xr evcnt 9
+counter "ixgX qY Rx no jumbo mbuf" is incremented. If it's observed,
+The number can be changed by the following config parameter:
+.Bl -tag -width IXGBE_JCLNUM_MULTI -offset 3n
+.It Dv IXGBE_JCLNUM_MULTI
+The number of RX jumbo buffer (cluster) per queue is calculated by
+.Dv IXGBE_JCLNUM_MULTI
+* (number of rx descriptor).
+The total number of clusters per queue is available with
+hw.ixgN.num_jcl_per_queue sysctl.
+.El
 .Sh SEE ALSO
 .Xr arp 4 ,
 .Xr ixg 4 ,

Index: src/sys/dev/pci/files.pci
diff -u src/sys/dev/pci/files.pci:1.435 src/sys/dev/pci/files.pci:1.436
--- src/sys/dev/pci/files.pci:1.435	Mon Mar  1 17:48:52 2021
+++ src/sys/dev/pci/files.pci	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-#	$NetBSD: files.pci,v 1.435 2021/03/01 17:48:52 jakllsch Exp $
+#	$NetBSD: files.pci,v 1.436 2021/03/09 10:03:18 msaitoh Exp $
 #
 # Config file and device description for machine-independent PCI code.
 # Included by ports that need it.  Requires that the SCSI files be
@@ -664,6 +664,7 @@ file	dev/pci/ixgbe/ixgbe_phy.c	ixg | ixv
 file	dev/pci/ixgbe/ixgbe_vf.c	ixg | ixv
 file	dev/pci/ixgbe/if_bypass.c	ixg | ixv
 file	dev/pci/ixgbe/if_fdir.c		ixg | ixv
+defparam opt_ixgbe.h	IXGBE_JCLNUM_MULTI
 
 # This appears to be the driver for virtual instances of i82599.
 device	ixv: ether, ifnet, arp, mii, mii_phy

Index: src/sys/dev/pci/ixgbe/ix_txrx.c
diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.66 src/sys/dev/pci/ixgbe/ix_txrx.c:1.67
--- src/sys/dev/pci/ixgbe/ix_txrx.c:1.66	Mon Mar  8 07:10:45 2021
+++ src/sys/dev/pci/ixgbe/ix_txrx.c	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.66 2021/03/08 07:10:45 msaitoh Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.67 2021/03/09 10:03:18 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -1507,7 +1507,7 @@ ixgbe_setup_receive_ring(struct rx_ring 
 	 * Assume all of rxr->ptag are the same.
 	 */
 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
-	    (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
+	    adapter->num_jcl, adapter->rx_mbuf_sz);
 
 	IXGBE_RX_LOCK(rxr);
 

Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.278 src/sys/dev/pci/ixgbe/ixgbe.c:1.279
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.278	Thu Jan 14 05:47:35 2021
+++ src/sys/dev/pci/ixgbe/ixgbe.c	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.278 2021/01/14 05:47:35 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.279 2021/03/09 10:03:18 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -67,6 +67,7 @@
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_net_mpsafe.h"
+#include "opt_ixgbe.h"
 #endif
 
 #include "ixgbe.h"
@@ -982,6 +983,8 @@ ixgbe_attach(device_t parent, device_t d
 	} else
 		adapter->num_rx_desc = ixgbe_rxd;
 
+	adapter->num_jcl = adapter->num_rx_desc * IXGBE_JCLNUM_MULTI;
+
 	/* Allocate our TX/RX Queues */
 	if (ixgbe_allocate_queues(adapter)) {
 		error = ENOMEM;
@@ -3366,6 +3369,13 @@ ixgbe_add_device_sysctls(struct adapter 
 		aprint_error_dev(dev, "could not create sysctl\n");
 
 	if (sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_READONLY, CTLTYPE_INT, "num_jcl_per_queue",
+	    SYSCTL_DESCR("Number of jumbo buffers per queue"),
+	    NULL, 0, &adapter->num_jcl, 0, CTL_CREATE,
+	    CTL_EOL) != 0)
+		aprint_error_dev(dev, "could not create sysctl\n");
+
+	if (sysctl_createv(log, 0, &rnode, &cnode,
 	    CTLFLAG_READONLY, CTLTYPE_INT,
 	    "num_queues", SYSCTL_DESCR("Number of queues"),
 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)

Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.74 src/sys/dev/pci/ixgbe/ixgbe.h:1.75
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.74	Sat Dec 26 06:07:16 2020
+++ src/sys/dev/pci/ixgbe/ixgbe.h	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.74 2020/12/26 06:07:16 msaitoh Exp $ */
+/* $NetBSD: ixgbe.h,v 1.75 2021/03/09 10:03:18 msaitoh Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -216,7 +216,7 @@
 #define IXGBE_TSO_SIZE                  262140
 #define IXGBE_RX_HDR                    128
 #define IXGBE_VFTA_SIZE                 128
-#define IXGBE_BR_SIZE                   4096
+#define IXGBE_BR_SIZE                   2048
 #define IXGBE_QUEUE_MIN_FREE            32
 #define IXGBE_MAX_TX_BUSY               10
 #define IXGBE_QUEUE_HUNG                0x80000000
@@ -568,6 +568,7 @@ struct adapter {
 	u64			active_queues;
 	u32			num_rx_desc;
 	u32			rx_process_limit;
+	int			num_jcl;
 
 	/* Multicast array memory */
 	struct ixgbe_mc_addr	*mta;

Index: src/sys/dev/pci/ixgbe/ixgbe_netbsd.h
diff -u src/sys/dev/pci/ixgbe/ixgbe_netbsd.h:1.11 src/sys/dev/pci/ixgbe/ixgbe_netbsd.h:1.12
--- src/sys/dev/pci/ixgbe/ixgbe_netbsd.h:1.11	Tue Mar  5 08:25:02 2019
+++ src/sys/dev/pci/ixgbe/ixgbe_netbsd.h	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-/*$NetBSD: ixgbe_netbsd.h,v 1.11 2019/03/05 08:25:02 msaitoh Exp $*/
+/*$NetBSD: ixgbe_netbsd.h,v 1.12 2021/03/09 10:03:18 msaitoh Exp $*/
 /*
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -40,6 +40,20 @@
 #define	MJUM16BYTES	(16 * 1024)
 #define	MJUMPAGESIZE	PAGE_SIZE
 
+/*
+ * Number of jcl per queue is calculated by
+ * adapter->num_rx_desc * IXGBE_JCLNUM_MULTI. The lower limit is 2.
+ */
+#define	IXGBE_JCLNUM_MULTI_LOWLIM	2
+#define	IXGBE_JCLNUM_MULTI_DEFAULT	3
+#if !defined(IXGBE_JCLNUM_MULTI)
+# define IXGBE_JCLNUM_MULTI IXGBE_JCLNUM_MULTI_DEFAULT
+#else
+# if (IXGBE_JCLNUM_MULTI < IXGBE_JCLNUM_MULTI_LOWLIM)
+#  error IXGBE_JCLNUM_MULTI is too low.
+# endif
+#endif
+
 #define IFCAP_RXCSUM	\
 	(IFCAP_CSUM_IPv4_Rx|IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|\
 	IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx)

Index: src/sys/dev/pci/ixgbe/ixv.c
diff -u src/sys/dev/pci/ixgbe/ixv.c:1.154 src/sys/dev/pci/ixgbe/ixv.c:1.155
--- src/sys/dev/pci/ixgbe/ixv.c:1.154	Mon Sep  7 05:50:58 2020
+++ src/sys/dev/pci/ixgbe/ixv.c	Tue Mar  9 10:03:18 2021
@@ -1,4 +1,4 @@
-/*$NetBSD: ixv.c,v 1.154 2020/09/07 05:50:58 msaitoh Exp $*/
+/*$NetBSD: ixv.c,v 1.155 2021/03/09 10:03:18 msaitoh Exp $*/
 
 /******************************************************************************
 
@@ -38,6 +38,7 @@
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_net_mpsafe.h"
+#include "opt_ixgbe.h"
 #endif
 
 #include "ixgbe.h"
@@ -512,6 +513,8 @@ ixv_attach(device_t parent, device_t dev
 	} else
 		adapter->num_rx_desc = ixv_rxd;
 
+	adapter->num_jcl = adapter->num_rx_desc * IXGBE_JCLNUM_MULTI;
+
 	/* Setup MSI-X */
 	error = ixv_configure_interrupts(adapter);
 	if (error)
@@ -2558,6 +2561,13 @@ ixv_add_device_sysctls(struct adapter *a
 		aprint_error_dev(dev, "could not create sysctl\n");
 
 	if (sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_READONLY, CTLTYPE_INT, "num_jcl_per_queue",
+	    SYSCTL_DESCR("Number of jumbo buffers per queue"),
+	    NULL, 0, &adapter->num_jcl, 0, CTL_CREATE,
+	    CTL_EOL) != 0)
+		aprint_error_dev(dev, "could not create sysctl\n");
+
+	if (sysctl_createv(log, 0, &rnode, &cnode,
 	    CTLFLAG_READWRITE, CTLTYPE_BOOL,
 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
 	    NULL, 0, &adapter->enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)

Reply via email to