right now pools that make up mbufs are each limited individually. the following diff instead has the mbuf layer have a global limit on the amount of memory that can be allocated to the pools. this is enforced by wrapping the multi page pool allocator with something that checks the mbuf memory limit first.
this means all mbufs will use a max of 2k * nmbclust bytes instead of each pool being able to use that amount each. ok? Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.68 diff -u -p -r1.68 pool.h --- sys/pool.h 21 Nov 2016 01:44:06 -0000 1.68 +++ sys/pool.h 22 Nov 2016 02:31:47 -0000 @@ -205,6 +205,7 @@ struct pool { #ifdef _KERNEL extern struct pool_allocator pool_allocator_single; +extern struct pool_allocator pool_allocator_multi; struct pool_request { TAILQ_ENTRY(pool_request) pr_entry; Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.222 diff -u -p -r1.222 mbuf.h --- sys/mbuf.h 24 Oct 2016 04:38:44 -0000 1.222 +++ sys/mbuf.h 22 Nov 2016 02:31:47 -0000 @@ -416,6 +416,7 @@ struct mbuf_queue { }; #ifdef _KERNEL +struct pool; extern int nmbclust; /* limit on the # of clusters */ extern int mblowat; /* mbuf low water mark */ @@ -444,6 +445,7 @@ int m_leadingspace(struct mbuf *); int m_trailingspace(struct mbuf *); struct mbuf *m_clget(struct mbuf *, int, u_int); void m_extref(struct mbuf *, struct mbuf *); +void m_pool_init(struct pool *, u_int, u_int, const char *); void m_extfree_pool(caddr_t, u_int, void *); void m_adj(struct mbuf *, int); int m_copyback(struct mbuf *, int, int, const void *, int); Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.238 diff -u -p -r1.238 uipc_mbuf.c --- kern/uipc_mbuf.c 9 Nov 2016 08:55:11 -0000 1.238 +++ kern/uipc_mbuf.c 22 Nov 2016 02:31:47 -0000 @@ -133,6 +133,19 @@ void m_extfree(struct mbuf *); void nmbclust_update(void); void m_zero(struct mbuf *); +struct mutex m_pool_mtx = MUTEX_INITIALIZER(IPL_NET); +unsigned int mbuf_mem_limit; /* how much memory can we allocated */ +unsigned int mbuf_mem_alloc; /* how much memory has been allocated */ + +void *m_pool_alloc(struct pool *, int, int *); +void m_pool_free(struct pool *, void *); + +struct pool_allocator m_pool_allocator = { + m_pool_alloc, + m_pool_free, + 0 /* will be copied from pool_allocator_multi */ +}; + static void (*mextfree_fns[4])(caddr_t, u_int, void *); static u_int num_extfree_fns; @@ -148,6 +161,11 @@ mbinit(void) int i; unsigned int lowbits; + m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz; + + nmbclust_update(); + mbuf_mem_alloc = 0; + #if DIAGNOSTIC if (mclsizes[0] != MCLBYTES) panic("mbinit: the smallest cluster size != MCLBYTES"); @@ -155,9 +173,7 @@ mbinit(void) panic("mbinit: the largest cluster size != MAXMCLBYTES"); #endif - pool_init(&mbpool, MSIZE, 0, IPL_NET, 0, "mbufpl", NULL); - pool_set_constraints(&mbpool, &kp_dma_contig); - pool_setlowat(&mbpool, mblowat); + m_pool_init(&mbpool, MSIZE, 64, "mbufpl"); pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0, IPL_NET, 0, "mtagpl", NULL); @@ -171,47 +187,32 @@ mbinit(void) snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk", mclsizes[i] >> 10); } - pool_init(&mclpools[i], mclsizes[i], 64, IPL_NET, 0, - mclnames[i], NULL); - pool_set_constraints(&mclpools[i], &kp_dma_contig); - pool_setlowat(&mclpools[i], mcllowat); + + m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]); } (void)mextfree_register(m_extfree_pool); KASSERT(num_extfree_fns == 1); - - nmbclust_update(); } void mbcpuinit() { + int i; + mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF); + + pool_cache_init(&mbpool); + pool_cache_init(&mtagpool); + + for (i = 0; i < nitems(mclsizes); i++) + pool_cache_init(&mclpools[i]); } void nmbclust_update(void) { - unsigned int i, n; - - /* - * Set the hard limit on the mclpools to the number of - * mbuf clusters the kernel is to support. Log the limit - * reached message max once a minute. - */ - for (i = 0; i < nitems(mclsizes); i++) { - n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i]; - (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60); - /* - * XXX this needs to be reconsidered. - * Setting the high water mark to nmbclust is too high - * but we need to have enough spare buffers around so that - * allocations in interrupt context don't fail or mclgeti() - * drivers may end up with empty rings. - */ - pool_sethiwat(&mclpools[i], n); - } - pool_sethiwat(&mbpool, nmbclust); + mbuf_mem_limit = nmbclust * MCLBYTES; } /* @@ -1377,6 +1378,52 @@ m_dup_pkt(struct mbuf *m0, unsigned int fail: m_freem(m); return (NULL); +} + +void * +m_pool_alloc(struct pool *pp, int flags, int *slowdown) +{ + void *v = NULL; + int avail = 1; + + if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit) + return (NULL); + + mtx_enter(&m_pool_mtx); + if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit) + avail = 0; + else + mbuf_mem_alloc += pp->pr_pgsize; + mtx_leave(&m_pool_mtx); + + if (avail) { + v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown); + + if (v == NULL) { + mtx_enter(&m_pool_mtx); + mbuf_mem_alloc -= pp->pr_pgsize; + mtx_leave(&m_pool_mtx); + } + } + + return (v); +} + +void +m_pool_free(struct pool *pp, void *v) +{ + (*pool_allocator_multi.pa_free)(pp, v); + + mtx_enter(&m_pool_mtx); + mbuf_mem_alloc -= pp->pr_pgsize; + mtx_leave(&m_pool_mtx); +} + +void +m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg) +{ + pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator); + pool_set_constraints(pp, &kp_dma_contig); } #ifdef DDB Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.99 diff -u -p -r1.99 if_myx.c --- dev/pci/if_myx.c 31 Oct 2016 01:38:57 -0000 1.99 +++ dev/pci/if_myx.c 22 Nov 2016 02:31:47 -0000 @@ -294,8 +294,6 @@ myx_attach(struct device *parent, struct /* this is sort of racy */ if (myx_mcl_pool == NULL) { - extern struct kmem_pa_mode kp_dma_contig; - myx_mcl_pool = malloc(sizeof(*myx_mcl_pool), M_DEVBUF, M_WAITOK); if (myx_mcl_pool == NULL) { @@ -303,9 +301,9 @@ myx_attach(struct device *parent, struct DEVNAME(sc)); goto unmap; } - pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, IPL_NET, - 0, "myxmcl", NULL); - pool_set_constraints(myx_mcl_pool, &kp_dma_contig); + + m_pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, + "myxmcl"); } if (myx_pcie_dc(sc, pa) != 0)