Hi, To move our network performance to modern high bandwith and high latency characteristics, we have to increase the socket buffer size limit. That also implies more mbuf clusters to avoid running out of them.
This diff includes several aspects that somehow belong together. - Increase the number of mbufs on most architectures. I have to guess how much memory a typical machine has. If the value is too high, we may run out of kernel memory. arch/alpha/include/param.h:#define NMBCLUSTERS (16 * 1026) arch/amd64/include/param.h:#define NMBCLUSTERS (256 * 1024) arch/arm/include/param.h:#define NMBCLUSTERS (16 * 1024) arch/hppa/include/param.h:#define NMBCLUSTERS (4 * 1024) arch/i386/include/param.h:#define NMBCLUSTERS (32 * 1024) arch/m88k/include/param.h:#define NMBCLUSTERS (4 * 1024) arch/mips64/include/param.h:#define NMBCLUSTERS (8 * 1024) arch/powerpc/include/param.h:#define NMBCLUSTERS (4 * 1024) arch/sh/include/param.h:#define NMBCLUSTERS (4 * 1024) arch/sparc64/include/param.h:#define NMBCLUSTERS (16 * 1024) - Set size of mclsizes array explicitly to keep it in sync with mclpools. - Limit all mbuf cluster pools to the same memory size. Having limits by number will allow the large clusters using too much memory. - If sosend() cannot allocate a large cluster, try a small one as fallback. - Reduce the factor of the limits derived form NMBCLUSTERS. We want the additional clusters in the socket buffer and not elsewhere. - Increase the socket buffer size limit from 256 KB to 2 MB. If the value is too high, we may run out of mbufs. Feel free to comment and ok the parts seperately. bluhm Index: arch/alpha/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/alpha/include/param.h,v retrieving revision 1.39 diff -u -p -r1.39 param.h --- arch/alpha/include/param.h 26 Mar 2013 05:04:08 -0000 1.39 +++ arch/alpha/include/param.h 2 Sep 2016 15:35:35 -0000 @@ -63,7 +63,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (16 * 1026) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE (2 * PAGE_SIZE) /* default message buffer size */ Index: arch/amd64/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/param.h,v retrieving revision 1.22 diff -u -p -r1.22 param.h --- arch/amd64/include/param.h 26 Mar 2013 05:04:10 -0000 1.22 +++ arch/amd64/include/param.h 2 Sep 2016 15:58:26 -0000 @@ -72,7 +72,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 6144 /* map size, max cluster allocation */ +#define NMBCLUSTERS (256 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE (16 * PAGE_SIZE) /* default message buffer size */ Index: arch/arm/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/arm/include/param.h,v retrieving revision 1.22 diff -u -p -r1.22 param.h --- arch/arm/include/param.h 20 Aug 2016 19:31:01 -0000 1.22 +++ arch/arm/include/param.h 2 Sep 2016 16:00:12 -0000 @@ -55,7 +55,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (16 * 1024) /* max cluster allocation */ /* * Maximum size of the kernel malloc arena in PAGE_SIZE-sized Index: arch/hppa/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/hppa/include/param.h,v retrieving revision 1.45 diff -u -p -r1.45 param.h --- arch/hppa/include/param.h 26 Mar 2013 05:04:10 -0000 1.45 +++ arch/hppa/include/param.h 2 Sep 2016 15:39:12 -0000 @@ -51,7 +51,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (4 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE (2 * PAGE_SIZE) /* default message buffer size */ Index: arch/i386/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/i386/include/param.h,v retrieving revision 1.47 diff -u -p -r1.47 param.h --- arch/i386/include/param.h 26 Mar 2013 05:04:10 -0000 1.47 +++ arch/i386/include/param.h 2 Sep 2016 15:39:43 -0000 @@ -67,7 +67,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 6144 /* map size, max cluster allocation */ +#define NMBCLUSTERS (32 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE (4 * PAGE_SIZE) /* default message buffer size */ Index: arch/m88k/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/m88k/include/param.h,v retrieving revision 1.19 diff -u -p -r1.19 param.h --- arch/m88k/include/param.h 1 Apr 2013 12:52:24 -0000 1.19 +++ arch/m88k/include/param.h 2 Sep 2016 15:45:03 -0000 @@ -64,7 +64,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (4 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE PAGE_SIZE Index: arch/mips64/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/mips64/include/param.h,v retrieving revision 1.34 diff -u -p -r1.34 param.h --- arch/mips64/include/param.h 2 Nov 2015 20:13:45 -0000 1.34 +++ arch/mips64/include/param.h 2 Sep 2016 16:00:29 -0000 @@ -62,7 +62,7 @@ #define USPACE_ALIGN (2 * PAGE_SIZE) /* align to an even TLB boundary */ #endif -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (8 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #if PAGE_SHIFT > 12 Index: arch/powerpc/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/powerpc/include/param.h,v retrieving revision 1.36 diff -u -p -r1.36 param.h --- arch/powerpc/include/param.h 13 Jun 2013 11:29:20 -0000 1.36 +++ arch/powerpc/include/param.h 2 Sep 2016 15:46:12 -0000 @@ -58,7 +58,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (4 * 1024) /* max cluster allocation */ /* * Maximum size of the kernel malloc arena in PAGE_SIZE-sized Index: arch/sh/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/sh/include/param.h,v retrieving revision 1.11 diff -u -p -r1.11 param.h --- arch/sh/include/param.h 26 Mar 2013 05:04:10 -0000 1.11 +++ arch/sh/include/param.h 2 Sep 2016 15:59:29 -0000 @@ -80,7 +80,7 @@ #ifdef _KERNEL -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (4 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE PAGE_SIZE /* default message buffer size */ Index: arch/sparc64/include/param.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/sparc64/include/param.h,v retrieving revision 1.39 diff -u -p -r1.39 param.h --- arch/sparc64/include/param.h 26 Mar 2013 05:04:10 -0000 1.39 +++ arch/sparc64/include/param.h 2 Sep 2016 15:48:11 -0000 @@ -135,7 +135,7 @@ #define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area */ #define USPACE_ALIGN 0 /* u-area alignment 0-none */ -#define NMBCLUSTERS 4096 /* map size, max cluster allocation */ +#define NMBCLUSTERS (16 * 1024) /* max cluster allocation */ #ifndef MSGBUFSIZE #define MSGBUFSIZE (1 * PAGE_SIZE) Index: kern/uipc_mbuf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.226 diff -u -p -r1.226 uipc_mbuf.c --- kern/uipc_mbuf.c 13 Jun 2016 21:24:43 -0000 1.226 +++ kern/uipc_mbuf.c 2 Sep 2016 15:10:21 -0000 @@ -105,7 +105,7 @@ struct pool mbpool; /* mbuf pool */ struct pool mtagpool; /* mbuf cluster pools */ -u_int mclsizes[] = { +u_int mclsizes[MCLPOOLS] = { MCLBYTES, /* must be at slot 0 */ 4 * 1024, 8 * 1024, @@ -179,15 +179,16 @@ mbinit(void) void nmbclust_update(void) { - int i; + unsigned int i, n; + /* * Set the hard limit on the mclpools to the number of * mbuf clusters the kernel is to support. Log the limit * reached message max once a minute. */ for (i = 0; i < nitems(mclsizes); i++) { - (void)pool_sethardlimit(&mclpools[i], nmbclust, - mclpool_warnmsg, 60); + n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i]; + (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60); /* * XXX this needs to be reconsidered. * Setting the high water mark to nmbclust is too high @@ -195,7 +196,7 @@ nmbclust_update(void) * allocations in interrupt context don't fail or mclgeti() * drivers may end up with empty rings. */ - pool_sethiwat(&mclpools[i], nmbclust); + pool_sethiwat(&mclpools[i], n); } pool_sethiwat(&mbpool, nmbclust); } Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.155 diff -u -p -r1.155 uipc_socket.c --- kern/uipc_socket.c 25 Aug 2016 14:13:19 -0000 1.155 +++ kern/uipc_socket.c 2 Sep 2016 15:12:38 -0000 @@ -546,6 +546,8 @@ m_getuio(struct mbuf **mp, int atomic, l if (resid >= MINCLSIZE) { MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES)); if ((m->m_flags & M_EXT) == 0) + MCLGETI(m, M_NOWAIT, NULL, MCLBYTES); + if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = m->m_ext.ext_size; len = ulmin(mlen, resid); Index: net/pfvar.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v retrieving revision 1.436 diff -u -p -r1.436 pfvar.h --- net/pfvar.h 20 Aug 2016 08:34:30 -0000 1.436 +++ net/pfvar.h 2 Sep 2016 16:03:30 -0000 @@ -1411,8 +1411,8 @@ struct pf_divert { }; /* Fragment entries reference mbuf clusters, so base the default on that. */ -#define PFFRAG_FRENT_HIWAT (NMBCLUSTERS / 4) /* Number of entries */ -#define PFFRAG_FRAG_HIWAT (NMBCLUSTERS / 8) /* Number of packets */ +#define PFFRAG_FRENT_HIWAT (NMBCLUSTERS / 16) /* Number of entries */ +#define PFFRAG_FRAG_HIWAT (NMBCLUSTERS / 32) /* Number of packets */ #define PFR_KTABLE_HIWAT 1000 /* Number of tables */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ Index: netinet/tcp_subr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.152 diff -u -p -r1.152 tcp_subr.c --- netinet/tcp_subr.c 31 Aug 2016 11:05:05 -0000 1.152 +++ netinet/tcp_subr.c 2 Sep 2016 16:03:24 -0000 @@ -120,7 +120,7 @@ u_int32_t tcp_now = 1; #define TCB_INITIAL_HASH_SIZE 128 #endif -int tcp_reass_limit = NMBCLUSTERS / 2; /* hardlimit for tcpqe_pool */ +int tcp_reass_limit = NMBCLUSTERS / 8; /* hardlimit for tcpqe_pool */ #ifdef TCP_SACK int tcp_sackhole_limit = 32*1024; /* hardlimit for sackhl_pool */ #endif Index: sys/socketvar.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v retrieving revision 1.62 diff -u -p -r1.62 socketvar.h --- sys/socketvar.h 25 Aug 2016 14:13:19 -0000 1.62 +++ sys/socketvar.h 2 Sep 2016 16:34:59 -0000 @@ -116,7 +116,7 @@ struct socket { short sb_flags; /* flags, see below */ u_short sb_timeo; /* timeout for read/write */ } so_rcv, so_snd; -#define SB_MAX (256*1024) /* default for max chars in sockbuf */ +#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ #define SB_SEL 0x08 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */