Module Name: src
Committed By: msaitoh
Date: Tue Mar 9 10:03:18 UTC 2021
Modified Files:
src/share/man/man4: ixg.4 ixv.4
src/sys/dev/pci: files.pci
src/sys/dev/pci/ixgbe: ix_txrx.c ixgbe.c ixgbe.h ixgbe_netbsd.h ixv.c
Log Message:
Modify some parameters to reduce packet dropping.
- Background: ixgbe doesn't use common MCLGET() interface and use the
driver specific cluster allocation mechanism (jcl). The cluster is
pre-allocated with a fixed number and the current number per queue
is num_rx_desc * 2 (2048*2=4096). It's too small. It also has a problem
that the max length of the pcq which is used in the TX path is big
(4096). Example:
100M <----- [ixg0 ixg1] <----- 1G
2048 TX descs <--- 4096 pcqs <---- 2048 RX descs
If a machine forwards a traffic from 1G interface to 100M interface,
It would require 2048+4096+2048=8192 descriptors, but the current number
is 2048*2=4096. It's too small. Even if the both interface's link speed
is the same and only small number of packet is queued in the pcq, 4096
jcl is small because 2048(RX)+TX(2048)=4096. If jcl is exhausted, not only
forwarding from ixg1 to ixg0 is dropped, but also another forwarding path
from ixg1 to another interface(e.g. wm0) is also dropped. Sockets also
queue packets, so if a lot of sockets are used and/or a socket buffer
size is changed to bigger one, it'll also become a problem. If the jcl
is exhausted, evcnt(9) counter "ixgX qY Rx no jumbo mbuf" is incremented.
Example:
vmstat -ev | grep ixg1 | grep "no jumbo"
ixg1 q0 Rx no jumbo mbuf 0 0 misc
ixg1 q1 Rx no jumbo mbuf 0 0 misc
ixg1 q2 Rx no jumbo mbuf 141326 0 misc
ixg1 q3 Rx no jumbo mbuf 0 0 misc
- To solve this problem:
- Add new config parameter IXGBE_JCLNUM_MULTI and set the default to 3
(2048 * 3). The minimum number is 2. The total number of jcl per queue
is available with hw.ixgN.num_jcl_per_queue sysctl.
- Reduce the max length of the pcq() which is used in the TX path from
4096 to 2048.
- Reviewed by knakahara@ and ozaki-r@.
- TODO: Use MCLGET().
To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/share/man/man4/ixg.4
cvs rdiff -u -r1.5 -r1.6 src/share/man/man4/ixv.4
cvs rdiff -u -r1.435 -r1.436 src/sys/dev/pci/files.pci
cvs rdiff -u -r1.66 -r1.67 src/sys/dev/pci/ixgbe/ix_txrx.c
cvs rdiff -u -r1.278 -r1.279 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.74 -r1.75 src/sys/dev/pci/ixgbe/ixgbe.h
cvs rdiff -u -r1.11 -r1.12 src/sys/dev/pci/ixgbe/ixgbe_netbsd.h
cvs rdiff -u -r1.154 -r1.155 src/sys/dev/pci/ixgbe/ixv.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/share/man/man4/ixg.4
diff -u src/share/man/man4/ixg.4:1.12 src/share/man/man4/ixg.4:1.13
--- src/share/man/man4/ixg.4:1.12 Wed May 9 08:01:16 2018
+++ src/share/man/man4/ixg.4 Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-.\" $NetBSD: ixg.4,v 1.12 2018/05/09 08:01:16 wiz Exp $
+.\" $NetBSD: ixg.4,v 1.13 2021/03/09 10:03:18 msaitoh Exp $
.\"
.\" Copyright (c) 2001-2008, Intel Corporation
.\" All rights reserved.
@@ -33,7 +33,7 @@
.\"
.\" $FreeBSD: src/share/man/man4/ixgbe.4,v 1.3 2010/12/19 23:54:31 yongari Exp $
.\"
-.Dd May 9, 2018
+.Dd March 9, 2021
.Dt IXG 4
.Os
.Sh NAME
@@ -83,6 +83,24 @@ go to the Intel support website at:
.\" with a supported adapter, email the specific information related to the
.\" issue to
.\" .Aq [email protected] .
+.Sh OPTIONS
+The
+.Nm
+driver doesn't use the common
+.Xr MCLGET 9
+interface and use the driver specific cluster allocation mechanism.
+If it's exhausted,
+.Xr evcnt 9
+counter "ixgX qY Rx no jumbo mbuf" is incremented. If it's observed,
+The number can be changed by the following config parameter:
+.Bl -tag -width IXGBE_JCLNUM_MULTI -offset 3n
+.It Dv IXGBE_JCLNUM_MULTI
+The number of RX jumbo buffer (cluster) per queue is calculated by
+.Dv IXGBE_JCLNUM_MULTI
+* (number of rx descriptor).
+The total number of clusters per queue is available with
+hw.ixgN.num_jcl_per_queue sysctl.
+.El
.Sh SEE ALSO
.Xr arp 4 ,
.Xr ixv 4 ,
Index: src/share/man/man4/ixv.4
diff -u src/share/man/man4/ixv.4:1.5 src/share/man/man4/ixv.4:1.6
--- src/share/man/man4/ixv.4:1.5 Thu Sep 5 10:01:30 2019
+++ src/share/man/man4/ixv.4 Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-.\" $NetBSD: ixv.4,v 1.5 2019/09/05 10:01:30 msaitoh Exp $
+.\" $NetBSD: ixv.4,v 1.6 2021/03/09 10:03:18 msaitoh Exp $
.\"
.\" Copyright (c) 2018 The NetBSD Foundation, Inc.
.\" All rights reserved.
@@ -27,7 +27,7 @@
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd September 5, 2019
+.Dd March 9, 2021
.Dt IXV 4
.Os
.Sh NAME
@@ -43,6 +43,24 @@ newer chips support.
It can be used on a
.Nx
guest that the host supports SR-IOV.
+.Sh OPTIONS
+The
+.Nm
+driver doesn't use the common
+.Xr MCLGET 9
+interface and use the driver specific cluster allocation mechanism.
+If it's exhausted,
+.Xr evcnt 9
+counter "ixgX qY Rx no jumbo mbuf" is incremented. If it's observed,
+The number can be changed by the following config parameter:
+.Bl -tag -width IXGBE_JCLNUM_MULTI -offset 3n
+.It Dv IXGBE_JCLNUM_MULTI
+The number of RX jumbo buffer (cluster) per queue is calculated by
+.Dv IXGBE_JCLNUM_MULTI
+* (number of rx descriptor).
+The total number of clusters per queue is available with
+hw.ixgN.num_jcl_per_queue sysctl.
+.El
.Sh SEE ALSO
.Xr arp 4 ,
.Xr ixg 4 ,
Index: src/sys/dev/pci/files.pci
diff -u src/sys/dev/pci/files.pci:1.435 src/sys/dev/pci/files.pci:1.436
--- src/sys/dev/pci/files.pci:1.435 Mon Mar 1 17:48:52 2021
+++ src/sys/dev/pci/files.pci Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-# $NetBSD: files.pci,v 1.435 2021/03/01 17:48:52 jakllsch Exp $
+# $NetBSD: files.pci,v 1.436 2021/03/09 10:03:18 msaitoh Exp $
#
# Config file and device description for machine-independent PCI code.
# Included by ports that need it. Requires that the SCSI files be
@@ -664,6 +664,7 @@ file dev/pci/ixgbe/ixgbe_phy.c ixg | ixv
file dev/pci/ixgbe/ixgbe_vf.c ixg | ixv
file dev/pci/ixgbe/if_bypass.c ixg | ixv
file dev/pci/ixgbe/if_fdir.c ixg | ixv
+defparam opt_ixgbe.h IXGBE_JCLNUM_MULTI
# This appears to be the driver for virtual instances of i82599.
device ixv: ether, ifnet, arp, mii, mii_phy
Index: src/sys/dev/pci/ixgbe/ix_txrx.c
diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.66 src/sys/dev/pci/ixgbe/ix_txrx.c:1.67
--- src/sys/dev/pci/ixgbe/ix_txrx.c:1.66 Mon Mar 8 07:10:45 2021
+++ src/sys/dev/pci/ixgbe/ix_txrx.c Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.66 2021/03/08 07:10:45 msaitoh Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.67 2021/03/09 10:03:18 msaitoh Exp $ */
/******************************************************************************
@@ -1507,7 +1507,7 @@ ixgbe_setup_receive_ring(struct rx_ring
* Assume all of rxr->ptag are the same.
*/
ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
- (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
+ adapter->num_jcl, adapter->rx_mbuf_sz);
IXGBE_RX_LOCK(rxr);
Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.278 src/sys/dev/pci/ixgbe/ixgbe.c:1.279
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.278 Thu Jan 14 05:47:35 2021
+++ src/sys/dev/pci/ixgbe/ixgbe.c Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.278 2021/01/14 05:47:35 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.279 2021/03/09 10:03:18 msaitoh Exp $ */
/******************************************************************************
@@ -67,6 +67,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_net_mpsafe.h"
+#include "opt_ixgbe.h"
#endif
#include "ixgbe.h"
@@ -982,6 +983,8 @@ ixgbe_attach(device_t parent, device_t d
} else
adapter->num_rx_desc = ixgbe_rxd;
+ adapter->num_jcl = adapter->num_rx_desc * IXGBE_JCLNUM_MULTI;
+
/* Allocate our TX/RX Queues */
if (ixgbe_allocate_queues(adapter)) {
error = ENOMEM;
@@ -3366,6 +3369,13 @@ ixgbe_add_device_sysctls(struct adapter
aprint_error_dev(dev, "could not create sysctl\n");
if (sysctl_createv(log, 0, &rnode, &cnode,
+ CTLFLAG_READONLY, CTLTYPE_INT, "num_jcl_per_queue",
+ SYSCTL_DESCR("Number of jumbo buffers per queue"),
+ NULL, 0, &adapter->num_jcl, 0, CTL_CREATE,
+ CTL_EOL) != 0)
+ aprint_error_dev(dev, "could not create sysctl\n");
+
+ if (sysctl_createv(log, 0, &rnode, &cnode,
CTLFLAG_READONLY, CTLTYPE_INT,
"num_queues", SYSCTL_DESCR("Number of queues"),
NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.74 src/sys/dev/pci/ixgbe/ixgbe.h:1.75
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.74 Sat Dec 26 06:07:16 2020
+++ src/sys/dev/pci/ixgbe/ixgbe.h Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.74 2020/12/26 06:07:16 msaitoh Exp $ */
+/* $NetBSD: ixgbe.h,v 1.75 2021/03/09 10:03:18 msaitoh Exp $ */
/******************************************************************************
SPDX-License-Identifier: BSD-3-Clause
@@ -216,7 +216,7 @@
#define IXGBE_TSO_SIZE 262140
#define IXGBE_RX_HDR 128
#define IXGBE_VFTA_SIZE 128
-#define IXGBE_BR_SIZE 4096
+#define IXGBE_BR_SIZE 2048
#define IXGBE_QUEUE_MIN_FREE 32
#define IXGBE_MAX_TX_BUSY 10
#define IXGBE_QUEUE_HUNG 0x80000000
@@ -568,6 +568,7 @@ struct adapter {
u64 active_queues;
u32 num_rx_desc;
u32 rx_process_limit;
+ int num_jcl;
/* Multicast array memory */
struct ixgbe_mc_addr *mta;
Index: src/sys/dev/pci/ixgbe/ixgbe_netbsd.h
diff -u src/sys/dev/pci/ixgbe/ixgbe_netbsd.h:1.11 src/sys/dev/pci/ixgbe/ixgbe_netbsd.h:1.12
--- src/sys/dev/pci/ixgbe/ixgbe_netbsd.h:1.11 Tue Mar 5 08:25:02 2019
+++ src/sys/dev/pci/ixgbe/ixgbe_netbsd.h Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-/*$NetBSD: ixgbe_netbsd.h,v 1.11 2019/03/05 08:25:02 msaitoh Exp $*/
+/*$NetBSD: ixgbe_netbsd.h,v 1.12 2021/03/09 10:03:18 msaitoh Exp $*/
/*
* Copyright (c) 2011 The NetBSD Foundation, Inc.
* All rights reserved.
@@ -40,6 +40,20 @@
#define MJUM16BYTES (16 * 1024)
#define MJUMPAGESIZE PAGE_SIZE
+/*
+ * Number of jcl per queue is calculated by
+ * adapter->num_rx_desc * IXGBE_JCLNUM_MULTI. The lower limit is 2.
+ */
+#define IXGBE_JCLNUM_MULTI_LOWLIM 2
+#define IXGBE_JCLNUM_MULTI_DEFAULT 3
+#if !defined(IXGBE_JCLNUM_MULTI)
+# define IXGBE_JCLNUM_MULTI IXGBE_JCLNUM_MULTI_DEFAULT
+#else
+# if (IXGBE_JCLNUM_MULTI < IXGBE_JCLNUM_MULTI_LOWLIM)
+# error IXGBE_JCLNUM_MULTI is too low.
+# endif
+#endif
+
#define IFCAP_RXCSUM \
(IFCAP_CSUM_IPv4_Rx|IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|\
IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx)
Index: src/sys/dev/pci/ixgbe/ixv.c
diff -u src/sys/dev/pci/ixgbe/ixv.c:1.154 src/sys/dev/pci/ixgbe/ixv.c:1.155
--- src/sys/dev/pci/ixgbe/ixv.c:1.154 Mon Sep 7 05:50:58 2020
+++ src/sys/dev/pci/ixgbe/ixv.c Tue Mar 9 10:03:18 2021
@@ -1,4 +1,4 @@
-/*$NetBSD: ixv.c,v 1.154 2020/09/07 05:50:58 msaitoh Exp $*/
+/*$NetBSD: ixv.c,v 1.155 2021/03/09 10:03:18 msaitoh Exp $*/
/******************************************************************************
@@ -38,6 +38,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_net_mpsafe.h"
+#include "opt_ixgbe.h"
#endif
#include "ixgbe.h"
@@ -512,6 +513,8 @@ ixv_attach(device_t parent, device_t dev
} else
adapter->num_rx_desc = ixv_rxd;
+ adapter->num_jcl = adapter->num_rx_desc * IXGBE_JCLNUM_MULTI;
+
/* Setup MSI-X */
error = ixv_configure_interrupts(adapter);
if (error)
@@ -2558,6 +2561,13 @@ ixv_add_device_sysctls(struct adapter *a
aprint_error_dev(dev, "could not create sysctl\n");
if (sysctl_createv(log, 0, &rnode, &cnode,
+ CTLFLAG_READONLY, CTLTYPE_INT, "num_jcl_per_queue",
+ SYSCTL_DESCR("Number of jumbo buffers per queue"),
+ NULL, 0, &adapter->num_jcl, 0, CTL_CREATE,
+ CTL_EOL) != 0)
+ aprint_error_dev(dev, "could not create sysctl\n");
+
+ if (sysctl_createv(log, 0, &rnode, &cnode,
CTLFLAG_READWRITE, CTLTYPE_BOOL,
"enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
NULL, 0, &adapter->enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)