On 26.06.2017 16:29, Ben RUBSON wrote:
> 
>> On 26 Jun 2017, at 15:25, Andrey V. Elsukov <bu7c...@yandex.ru> wrote:
>>
>> On 26.06.2017 16:27, Ben RUBSON wrote:
>>>
>>>> On 26 Jun 2017, at 15:13, Andrey V. Elsukov <bu7c...@yandex.ru> wrote:
>>>>
>>>> I think it is not mlxen specific problem, we have the same symptoms with
>>>> ixgbe(4) driver too. To avoid the problem we have patches that are
>>>> disable using of 9k mbufs, and instead only use 4k mbufs.
>>>
>>> Interesting feedback Andrey, thank you !
>>> The problem may be then "general".
>>> So you still use large MTU (>=9000) but only allocating 4k mbufs, as a 
>>> workaround ?
>>
>> Yes.
> 
> Is it a kernel patch or a driver/ixgbe patch ?

I attached it.

-- 
WBR, Andrey V. Elsukov
From 57b4789b7f6699a581ded2f4e07c7b12737af1e7 Mon Sep 17 00:00:00 2001
From: "Andrey V. Elsukov" <bu7c...@yandex.ru>
Date: Thu, 6 Oct 2016 14:56:37 +0300
Subject: [PATCH 04/65] Add m_preferredsize() and use it in all intel's
 drivers.

---
 sys/dev/e1000/if_em.c     |  7 +------
 sys/dev/e1000/if_igb.c    |  7 +------
 sys/dev/ixgbe/if_ix.c     |  5 +----
 sys/dev/ixgbe/if_ixv.c    |  5 +----
 sys/dev/ixl/if_ixlv.c     |  5 +----
 sys/dev/ixl/ixl_pf_main.c |  5 +----
 sys/kern/kern_mbuf.c      | 35 +++++++++++++++++++++++++++++++++++
 sys/sys/mbuf.h            |  1 +
 8 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 7e2690eae08..1af66b7c519 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -1421,12 +1421,7 @@ em_init_locked(struct adapter *adapter)
 	** Figure out the desired mbuf
 	** pool for doing jumbos
 	*/
-	if (adapter->hw.mac.max_frame_size <= 2048)
-		adapter->rx_mbuf_sz = MCLBYTES;
-	else if (adapter->hw.mac.max_frame_size <= 4096)
-		adapter->rx_mbuf_sz = MJUMPAGESIZE;
-	else
-		adapter->rx_mbuf_sz = MJUM9BYTES;
+	adapter->rx_mbuf_sz = m_preferredsize(adapter->hw.mac.max_frame_size);
 
 	/* Prepare receive descriptors and buffers */
 	if (em_setup_receive_structures(adapter)) {
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index 8e018995029..bfaecae1f71 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -1325,12 +1325,7 @@ igb_init_locked(struct adapter *adapter)
 	** Figure out the desired mbuf pool
 	** for doing jumbo/packetsplit
 	*/
-	if (adapter->max_frame_size <= 2048)
-		adapter->rx_mbuf_sz = MCLBYTES;
-	else if (adapter->max_frame_size <= 4096)
-		adapter->rx_mbuf_sz = MJUMPAGESIZE;
-	else
-		adapter->rx_mbuf_sz = MJUM9BYTES;
+	adapter->rx_mbuf_sz = m_preferredsize(adapter->max_frame_size);
 
 	/* Prepare receive descriptors and buffers */
 	if (igb_setup_receive_structures(adapter)) {
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index cf2231dc8fc..26fce2704ba 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -1118,10 +1118,7 @@ ixgbe_init_locked(struct adapter *adapter)
 	ixgbe_set_multi(adapter);
 
 	/* Determine the correct mbuf pool, based on frame size */
-	if (adapter->max_frame_size <= MCLBYTES)
-		adapter->rx_mbuf_sz = MCLBYTES;
-	else
-		adapter->rx_mbuf_sz = MJUMPAGESIZE;
+	adapter->rx_mbuf_sz = m_preferredsize(adapter->max_frame_size);
 
 	/* Prepare receive descriptors and buffers */
 	if (ixgbe_setup_receive_structures(adapter)) {
diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c
index 80fb1b34be3..5062affb779 100644
--- a/sys/dev/ixgbe/if_ixv.c
+++ b/sys/dev/ixgbe/if_ixv.c
@@ -698,10 +698,7 @@ ixv_init_locked(struct adapter *adapter)
 	** Determine the correct mbuf pool
 	** for doing jumbo/headersplit
 	*/
-	if (ifp->if_mtu > ETHERMTU)
-		adapter->rx_mbuf_sz = MJUMPAGESIZE;
-	else
-		adapter->rx_mbuf_sz = MCLBYTES;
+	adapter->rx_mbuf_sz = m_preferredsize(ifp->if_mtu);
 
 	/* Prepare receive descriptors and buffers */
 	if (ixgbe_setup_receive_structures(adapter)) {
diff --git a/sys/dev/ixl/if_ixlv.c b/sys/dev/ixl/if_ixlv.c
index c447c34689e..608d784bfee 100644
--- a/sys/dev/ixl/if_ixlv.c
+++ b/sys/dev/ixl/if_ixlv.c
@@ -904,10 +904,7 @@ ixlv_init_locked(struct ixlv_sc *sc)
 
 		ixl_init_tx_ring(que);
 
-		if (vsi->max_frame_size <= MCLBYTES)
-			rxr->mbuf_sz = MCLBYTES;
-		else
-			rxr->mbuf_sz = MJUMPAGESIZE;
+		rxr->mbuf_sz = m_preferredsize(vsi->max_frame_size);
 		ixl_init_rx_ring(que);
 	}
 
diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c
index d8da4cfee10..8600b0f931e 100644
--- a/sys/dev/ixl/ixl_pf_main.c
+++ b/sys/dev/ixl/ixl_pf_main.c
@@ -2067,10 +2067,7 @@ ixl_initialize_vsi(struct ixl_vsi *vsi)
 		ixl_init_tx_ring(que);
 
 		/* Next setup the HMC RX Context  */
-		if (vsi->max_frame_size <= MCLBYTES)
-			rxr->mbuf_sz = MCLBYTES;
-		else
-			rxr->mbuf_sz = MJUMPAGESIZE;
+		rxr->mbuf_sz = m_preferredsize(vsi->max_frame_size);
 
 		u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len;
 
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index 0d0c1c86b16..7c10cedb075 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -103,6 +103,10 @@ int nmbjumbop;			/* limits number of page size jumbo clusters */
 int nmbjumbo9;			/* limits number of 9k jumbo clusters */
 int nmbjumbo16;			/* limits number of 16k jumbo clusters */
 
+static int nojumbobuf;		/* Use MCLBYTES mbufs */
+static int nojumbo9buf;		/* Use either MCLBYTES or MJUMPAGESIZE */
+static int nojumbo16buf;	/* Use any mbuf size less than MJUM16BYTES */
+
 static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
 
 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
@@ -151,6 +155,16 @@ tunable_mbinit(void *dummy)
 	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
 		nmbufs = lmax(maxmbufmem / MSIZE / 5,
 		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
+	/*
+	 * Defaults to disable 9/16-kbyte pages
+	 */
+	nojumbobuf = 0;
+	nojumbo9buf = 1;
+	nojumbo16buf = 1;
+
+	TUNABLE_INT_FETCH("kern.ipc.nojumbobuf", &nojumbobuf);
+	TUNABLE_INT_FETCH("kern.ipc.nojumbo9buf", &nojumbo9buf);
+	TUNABLE_INT_FETCH("kern.ipc.nojumbo16buf", &nojumbo16buf);
 }
 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
 
@@ -261,6 +275,27 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
     "Maximum number of mbufs allowed");
 
 /*
+ * Determine the correct mbuf pool
+ * for given mtu size
+ */
+int
+m_preferredsize(int mtu)
+{
+	int size;
+
+	if (mtu <= 2048 || nojumbobuf != 0)
+		size = MCLBYTES;
+	else if (mtu <= 4096 || nojumbo9buf != 0)
+		size = MJUMPAGESIZE;
+	else if (mtu <= 9216 || nojumbo16buf != 0)
+		size = MJUM9BYTES;
+	else
+		size = MJUM16BYTES;
+
+	return (size);
+}
+
+/*
  * Zones from which we allocate.
  */
 uma_zone_t	zone_mbuf;
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index fdd9931515d..b6a81b05e3b 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -606,6 +606,7 @@ u_int		 m_length(struct mbuf *, struct mbuf **);
 int		 m_mbuftouio(struct uio *, struct mbuf *, int);
 void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 int		 m_pkthdr_init(struct mbuf *, int);
+int		 m_preferredsize(int);
 struct mbuf	*m_prepend(struct mbuf *, int, int);
 void		 m_print(const struct mbuf *, int);
 struct mbuf	*m_pulldown(struct mbuf *, int, int, int *);
-- 
2.12.1

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to