> Date: Tue, 22 Nov 2016 12:42:39 +1000 > From: David Gwynne <da...@gwynne.id.au> > > right now pools that make up mbufs are each limited individually. > > the following diff instead has the mbuf layer have a global limit > on the amount of memory that can be allocated to the pools. this > is enforced by wrapping the multi page pool allocator with something > that checks the mbuf memory limit first. > > this means all mbufs will use a max of 2k * nmbclust bytes instead > of each pool being able to use that amount each. > > ok?
Mostly makes sense to me. Not sure the complixty of copying the supported page sizes from the multi-page pool allocator is worth the additional complication. I'd probably just initialize it the same way using POOL_ALLOC_SIZES(PAGE_SIZE, 1UL<<31, POOL_ALLOC_ALIGNED). Wouldn't it make sense to use atomic operations to keep track of the amount of memory that was allocated? Long run I suppose we want to drop nmbclust and let users tune the total amount of memory available for clusters and set the initial amount to a percentage of physical memory? > Index: sys/pool.h > =================================================================== > RCS file: /cvs/src/sys/sys/pool.h,v > retrieving revision 1.68 > diff -u -p -r1.68 pool.h > --- sys/pool.h 21 Nov 2016 01:44:06 -0000 1.68 > +++ sys/pool.h 22 Nov 2016 02:31:47 -0000 > @@ -205,6 +205,7 @@ struct pool { > #ifdef _KERNEL > > extern struct pool_allocator pool_allocator_single; > +extern struct pool_allocator pool_allocator_multi; > > struct pool_request { > TAILQ_ENTRY(pool_request) pr_entry; > Index: sys/mbuf.h > =================================================================== > RCS file: /cvs/src/sys/sys/mbuf.h,v > retrieving revision 1.222 > diff -u -p -r1.222 mbuf.h > --- sys/mbuf.h 24 Oct 2016 04:38:44 -0000 1.222 > +++ sys/mbuf.h 22 Nov 2016 02:31:47 -0000 > @@ -416,6 +416,7 @@ struct mbuf_queue { > }; > > #ifdef _KERNEL > +struct pool; > > extern int nmbclust; /* limit on the # of clusters */ > extern int mblowat; /* mbuf low water mark */ > @@ -444,6 +445,7 @@ int m_leadingspace(struct mbuf *); > int m_trailingspace(struct mbuf *); > struct mbuf *m_clget(struct mbuf *, int, u_int); > void m_extref(struct mbuf *, struct mbuf *); > +void m_pool_init(struct pool *, u_int, u_int, const char *); > void m_extfree_pool(caddr_t, u_int, void *); > void m_adj(struct mbuf *, int); > int m_copyback(struct mbuf *, int, int, const void *, int); > Index: kern/uipc_mbuf.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v > retrieving revision 1.238 > diff -u -p -r1.238 uipc_mbuf.c > --- kern/uipc_mbuf.c 9 Nov 2016 08:55:11 -0000 1.238 > +++ kern/uipc_mbuf.c 22 Nov 2016 02:31:47 -0000 > @@ -133,6 +133,19 @@ void m_extfree(struct mbuf *); > void nmbclust_update(void); > void m_zero(struct mbuf *); > > +struct mutex m_pool_mtx = MUTEX_INITIALIZER(IPL_NET); > +unsigned int mbuf_mem_limit; /* how much memory can we allocated */ > +unsigned int mbuf_mem_alloc; /* how much memory has been allocated */ > + > +void *m_pool_alloc(struct pool *, int, int *); > +void m_pool_free(struct pool *, void *); > + > +struct pool_allocator m_pool_allocator = { > + m_pool_alloc, > + m_pool_free, > + 0 /* will be copied from pool_allocator_multi */ > +}; > + > static void (*mextfree_fns[4])(caddr_t, u_int, void *); > static u_int num_extfree_fns; > > @@ -148,6 +161,11 @@ mbinit(void) > int i; > unsigned int lowbits; > > + m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz; > + > + nmbclust_update(); > + mbuf_mem_alloc = 0; > + > #if DIAGNOSTIC > if (mclsizes[0] != MCLBYTES) > panic("mbinit: the smallest cluster size != MCLBYTES"); > @@ -155,9 +173,7 @@ mbinit(void) > panic("mbinit: the largest cluster size != MAXMCLBYTES"); > #endif > > - pool_init(&mbpool, MSIZE, 0, IPL_NET, 0, "mbufpl", NULL); > - pool_set_constraints(&mbpool, &kp_dma_contig); > - pool_setlowat(&mbpool, mblowat); > + m_pool_init(&mbpool, MSIZE, 64, "mbufpl"); > > pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0, > IPL_NET, 0, "mtagpl", NULL); > @@ -171,47 +187,32 @@ mbinit(void) > snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk", > mclsizes[i] >> 10); > } > - pool_init(&mclpools[i], mclsizes[i], 64, IPL_NET, 0, > - mclnames[i], NULL); > - pool_set_constraints(&mclpools[i], &kp_dma_contig); > - pool_setlowat(&mclpools[i], mcllowat); > + > + m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]); > } > > (void)mextfree_register(m_extfree_pool); > KASSERT(num_extfree_fns == 1); > - > - nmbclust_update(); > } > > void > mbcpuinit() > { > + int i; > + > mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF); > + > + pool_cache_init(&mbpool); > + pool_cache_init(&mtagpool); > + > + for (i = 0; i < nitems(mclsizes); i++) > + pool_cache_init(&mclpools[i]); > } > > void > nmbclust_update(void) > { > - unsigned int i, n; > - > - /* > - * Set the hard limit on the mclpools to the number of > - * mbuf clusters the kernel is to support. Log the limit > - * reached message max once a minute. > - */ > - for (i = 0; i < nitems(mclsizes); i++) { > - n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i]; > - (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60); > - /* > - * XXX this needs to be reconsidered. > - * Setting the high water mark to nmbclust is too high > - * but we need to have enough spare buffers around so that > - * allocations in interrupt context don't fail or mclgeti() > - * drivers may end up with empty rings. > - */ > - pool_sethiwat(&mclpools[i], n); > - } > - pool_sethiwat(&mbpool, nmbclust); > + mbuf_mem_limit = nmbclust * MCLBYTES; > } > > /* > @@ -1377,6 +1378,52 @@ m_dup_pkt(struct mbuf *m0, unsigned int > fail: > m_freem(m); > return (NULL); > +} > + > +void * > +m_pool_alloc(struct pool *pp, int flags, int *slowdown) > +{ > + void *v = NULL; > + int avail = 1; > + > + if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit) > + return (NULL); > + > + mtx_enter(&m_pool_mtx); > + if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit) > + avail = 0; > + else > + mbuf_mem_alloc += pp->pr_pgsize; > + mtx_leave(&m_pool_mtx); > + > + if (avail) { > + v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown); > + > + if (v == NULL) { > + mtx_enter(&m_pool_mtx); > + mbuf_mem_alloc -= pp->pr_pgsize; > + mtx_leave(&m_pool_mtx); > + } > + } > + > + return (v); > +} > + > +void > +m_pool_free(struct pool *pp, void *v) > +{ > + (*pool_allocator_multi.pa_free)(pp, v); > + > + mtx_enter(&m_pool_mtx); > + mbuf_mem_alloc -= pp->pr_pgsize; > + mtx_leave(&m_pool_mtx); > +} > + > +void > +m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg) > +{ > + pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator); > + pool_set_constraints(pp, &kp_dma_contig); > } > > #ifdef DDB > Index: dev/pci/if_myx.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_myx.c,v > retrieving revision 1.99 > diff -u -p -r1.99 if_myx.c > --- dev/pci/if_myx.c 31 Oct 2016 01:38:57 -0000 1.99 > +++ dev/pci/if_myx.c 22 Nov 2016 02:31:47 -0000 > @@ -294,8 +294,6 @@ myx_attach(struct device *parent, struct > > /* this is sort of racy */ > if (myx_mcl_pool == NULL) { > - extern struct kmem_pa_mode kp_dma_contig; > - > myx_mcl_pool = malloc(sizeof(*myx_mcl_pool), M_DEVBUF, > M_WAITOK); > if (myx_mcl_pool == NULL) { > @@ -303,9 +301,9 @@ myx_attach(struct device *parent, struct > DEVNAME(sc)); > goto unmap; > } > - pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, IPL_NET, > - 0, "myxmcl", NULL); > - pool_set_constraints(myx_mcl_pool, &kp_dma_contig); > + > + m_pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, > + "myxmcl"); > } > > if (myx_pcie_dc(sc, pa) != 0) > >