right now pools that make up mbufs are each limited individually.

the following diff instead has the mbuf layer have a global limit
on the amount of memory that can be allocated to the pools. this
is enforced by wrapping the multi page pool allocator with something
that checks the mbuf memory limit first.

this means all mbufs will use a max of 2k * nmbclust bytes instead
of each pool being able to use that amount each.

ok?

Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.68
diff -u -p -r1.68 pool.h
--- sys/pool.h  21 Nov 2016 01:44:06 -0000      1.68
+++ sys/pool.h  22 Nov 2016 02:31:47 -0000
@@ -205,6 +205,7 @@ struct pool {
 #ifdef _KERNEL
 
 extern struct pool_allocator pool_allocator_single;
+extern struct pool_allocator pool_allocator_multi;
 
 struct pool_request {
        TAILQ_ENTRY(pool_request) pr_entry;
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.222
diff -u -p -r1.222 mbuf.h
--- sys/mbuf.h  24 Oct 2016 04:38:44 -0000      1.222
+++ sys/mbuf.h  22 Nov 2016 02:31:47 -0000
@@ -416,6 +416,7 @@ struct mbuf_queue {
 };
 
 #ifdef _KERNEL
+struct pool;
 
 extern int nmbclust;                   /* limit on the # of clusters */
 extern int mblowat;                    /* mbuf low water mark */
@@ -444,6 +445,7 @@ int m_leadingspace(struct mbuf *);
 int    m_trailingspace(struct mbuf *);
 struct mbuf *m_clget(struct mbuf *, int, u_int);
 void   m_extref(struct mbuf *, struct mbuf *);
+void   m_pool_init(struct pool *, u_int, u_int, const char *);
 void   m_extfree_pool(caddr_t, u_int, void *);
 void   m_adj(struct mbuf *, int);
 int    m_copyback(struct mbuf *, int, int, const void *, int);
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.238
diff -u -p -r1.238 uipc_mbuf.c
--- kern/uipc_mbuf.c    9 Nov 2016 08:55:11 -0000       1.238
+++ kern/uipc_mbuf.c    22 Nov 2016 02:31:47 -0000
@@ -133,6 +133,19 @@ void       m_extfree(struct mbuf *);
 void   nmbclust_update(void);
 void   m_zero(struct mbuf *);
 
+struct mutex m_pool_mtx = MUTEX_INITIALIZER(IPL_NET);
+unsigned int mbuf_mem_limit; /* how much memory can we allocated */
+unsigned int mbuf_mem_alloc; /* how much memory has been allocated */
+
+void   *m_pool_alloc(struct pool *, int, int *);
+void   m_pool_free(struct pool *, void *);
+
+struct pool_allocator m_pool_allocator = {
+       m_pool_alloc,
+       m_pool_free,
+       0 /* will be copied from pool_allocator_multi */
+};
+
 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
 static u_int num_extfree_fns;
 
@@ -148,6 +161,11 @@ mbinit(void)
        int i;
        unsigned int lowbits;
 
+       m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
+
+       nmbclust_update();
+       mbuf_mem_alloc = 0;
+
 #if DIAGNOSTIC
        if (mclsizes[0] != MCLBYTES)
                panic("mbinit: the smallest cluster size != MCLBYTES");
@@ -155,9 +173,7 @@ mbinit(void)
                panic("mbinit: the largest cluster size != MAXMCLBYTES");
 #endif
 
-       pool_init(&mbpool, MSIZE, 0, IPL_NET, 0, "mbufpl", NULL);
-       pool_set_constraints(&mbpool, &kp_dma_contig);
-       pool_setlowat(&mbpool, mblowat);
+       m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
 
        pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
            IPL_NET, 0, "mtagpl", NULL);
@@ -171,47 +187,32 @@ mbinit(void)
                        snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
                            mclsizes[i] >> 10);
                }
-               pool_init(&mclpools[i], mclsizes[i], 64, IPL_NET, 0,
-                   mclnames[i], NULL);
-               pool_set_constraints(&mclpools[i], &kp_dma_contig);
-               pool_setlowat(&mclpools[i], mcllowat);
+
+               m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
        }
 
        (void)mextfree_register(m_extfree_pool);
        KASSERT(num_extfree_fns == 1);
-
-       nmbclust_update();
 }
 
 void
 mbcpuinit()
 {
+       int i;
+
        mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF);
+
+       pool_cache_init(&mbpool);
+       pool_cache_init(&mtagpool);
+
+       for (i = 0; i < nitems(mclsizes); i++)
+               pool_cache_init(&mclpools[i]);
 }
 
 void
 nmbclust_update(void)
 {
-       unsigned int i, n;
-
-       /*
-        * Set the hard limit on the mclpools to the number of
-        * mbuf clusters the kernel is to support.  Log the limit
-        * reached message max once a minute.
-        */
-       for (i = 0; i < nitems(mclsizes); i++) {
-               n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i];
-               (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60);
-               /*
-                * XXX this needs to be reconsidered.
-                * Setting the high water mark to nmbclust is too high
-                * but we need to have enough spare buffers around so that
-                * allocations in interrupt context don't fail or mclgeti()
-                * drivers may end up with empty rings.
-                */
-               pool_sethiwat(&mclpools[i], n);
-       }
-       pool_sethiwat(&mbpool, nmbclust);
+       mbuf_mem_limit = nmbclust * MCLBYTES;
 }
 
 /*
@@ -1377,6 +1378,52 @@ m_dup_pkt(struct mbuf *m0, unsigned int 
 fail:
        m_freem(m);
        return (NULL);
+}
+
+void *
+m_pool_alloc(struct pool *pp, int flags, int *slowdown)
+{
+       void *v = NULL;
+       int avail = 1;
+
+       if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit)
+               return (NULL);
+
+       mtx_enter(&m_pool_mtx);
+       if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit)
+               avail = 0;
+       else
+               mbuf_mem_alloc += pp->pr_pgsize;
+       mtx_leave(&m_pool_mtx);
+
+       if (avail) {
+               v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
+
+               if (v == NULL) {
+                       mtx_enter(&m_pool_mtx);
+                       mbuf_mem_alloc -= pp->pr_pgsize;
+                       mtx_leave(&m_pool_mtx);
+               }
+       }
+
+       return (v);
+}
+
+void
+m_pool_free(struct pool *pp, void *v)
+{
+       (*pool_allocator_multi.pa_free)(pp, v);
+
+       mtx_enter(&m_pool_mtx);
+       mbuf_mem_alloc -= pp->pr_pgsize;
+       mtx_leave(&m_pool_mtx);
+}
+
+void
+m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
+{
+       pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
+       pool_set_constraints(pp, &kp_dma_contig);
 }
 
 #ifdef DDB
Index: dev/pci/if_myx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.99
diff -u -p -r1.99 if_myx.c
--- dev/pci/if_myx.c    31 Oct 2016 01:38:57 -0000      1.99
+++ dev/pci/if_myx.c    22 Nov 2016 02:31:47 -0000
@@ -294,8 +294,6 @@ myx_attach(struct device *parent, struct
 
        /* this is sort of racy */
        if (myx_mcl_pool == NULL) {
-               extern struct kmem_pa_mode kp_dma_contig;
-
                myx_mcl_pool = malloc(sizeof(*myx_mcl_pool), M_DEVBUF,
                    M_WAITOK);
                if (myx_mcl_pool == NULL) {
@@ -303,9 +301,9 @@ myx_attach(struct device *parent, struct
                            DEVNAME(sc));
                        goto unmap;
                }
-               pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, IPL_NET,
-                   0, "myxmcl", NULL);
-               pool_set_constraints(myx_mcl_pool, &kp_dma_contig);
+
+               m_pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY,
+                   "myxmcl");
        }
 
        if (myx_pcie_dc(sc, pa) != 0)

Reply via email to