the diff below adds cleanup of idle lists in the cpu caches in pools.

the caches on the cpus move lists of items around instead of
individual items. these lists are moved to the global pool struct
and accumulate there. if you get a burst of work in the pool (eg,
you use a lot of mbufs for a short period) you'll end up with a lot
of lists in the global pool struct that dont get used.

we gc idle pages in vanilla pools and return them to the system,
so this extends that to the cache subsystem.

it does this by timestamping the global list of lists when it has
been empty, and if it hasnt been empty for a while it returns a
list to the pages where they can be gc'ed back to the page allocator.
if the items in a pool are being allocated in bursts over relatively
short periods, eg, think replenishing rx rings on a nic in rate
limited interrupts, then the global lists will be emptied regularly. if we
stop rxing packets, then we wont empty the lists and therefore wont
timestamp them, making them available for gc.

care has be taken when moving items on a cache list back to the
pages so the puts arent counted twice (once when the item is put
on a free list on a cpu cache, and again when moving from the list
back to the pages), so this splits pool_put up. pool_do_put is
solely responsible for putting items back into pool pages. this is
then called from pool_put (which still does the accounting for
normal pools) and the list gc. a neat side effect of this is that
the list gc can return multiple items to the pages while only taking
the lock around the page structures once.

i wrote this in feb, and it's been running solidly for me ever
since. i wanted to implement the systat bits beofre putting it in
though.

the last big pool cache chunk after this is growing the size of the
cache lists based on contention.

ok?

Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.70
diff -u -p -r1.70 pool.h
--- sys/pool.h  15 Jun 2017 02:52:30 -0000      1.70
+++ sys/pool.h  15 Jun 2017 11:28:31 -0000
@@ -185,11 +185,13 @@ struct pool {
        unsigned long   pr_cache_magic[2];
        struct mutex    pr_cache_mtx;
        struct pool_cache_lists
-                       pr_cache_lists;
-       u_int           pr_cache_nlist; /* # of lists */
+                       pr_cache_lists; /* list of idle item lists */
+       u_int           pr_cache_nlist; /* # of idle lists */
        u_int           pr_cache_items; /* target list length */
        u_int           pr_cache_contention;
+       int             pr_cache_tick;  /* time idle list was empty */
        int             pr_cache_nout;
+       uint64_t        pr_cache_ngc;   /* # of times the gc released a list */
 
        u_int           pr_align;
        u_int           pr_maxcolors;   /* Cache coloring */
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.212
diff -u -p -r1.212 subr_pool.c
--- kern/subr_pool.c    15 Jun 2017 03:50:50 -0000      1.212
+++ kern/subr_pool.c    15 Jun 2017 11:28:31 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: subr_pool.c,v 1.212 2017/06/15 03:50:50 dlg Exp $     */
+/*     $OpenBSD: subr_pool.c,v 1.211 2017/06/15 03:48:50 dlg Exp $     */
 /*     $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $       */
 
 /*-
@@ -135,6 +135,7 @@ struct pool_cache {
 void   *pool_cache_get(struct pool *);
 void    pool_cache_put(struct pool *, void *);
 void    pool_cache_destroy(struct pool *);
+void    pool_cache_gc(struct pool *);
 #endif
 void    pool_cache_pool_info(struct pool *, struct kinfo_pool *);
 int     pool_cache_info(struct pool *, void *, size_t *);
@@ -156,6 +157,7 @@ void         pool_p_free(struct pool *, struct 
 
 void    pool_update_curpage(struct pool *);
 void   *pool_do_get(struct pool *, int, int *);
+void    pool_do_put(struct pool *, void *);
 int     pool_chk_page(struct pool *, struct pool_page_header *, int);
 int     pool_chk(struct pool *);
 void    pool_get_done(void *, void *);
@@ -711,7 +713,6 @@ pool_do_get(struct pool *pp, int flags, 
 void
 pool_put(struct pool *pp, void *v)
 {
-       struct pool_item *pi = v;
        struct pool_page_header *ph, *freeph = NULL;
 
 #ifdef DIAGNOSTIC
@@ -728,6 +729,37 @@ pool_put(struct pool *pp, void *v)
 
        mtx_enter(&pp->pr_mtx);
 
+       pool_do_put(pp, v);
+
+       pp->pr_nout--;
+       pp->pr_nput++;
+
+       /* is it time to free a page? */
+       if (pp->pr_nidle > pp->pr_maxpages &&
+           (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
+           (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
+               freeph = ph;
+               pool_p_remove(pp, freeph);
+       }
+
+       mtx_leave(&pp->pr_mtx);
+
+       if (freeph != NULL)
+               pool_p_free(pp, freeph);
+
+       if (!TAILQ_EMPTY(&pp->pr_requests)) {
+               mtx_enter(&pp->pr_requests_mtx);
+               pool_runqueue(pp, PR_NOWAIT);
+               mtx_leave(&pp->pr_requests_mtx);
+       }
+}
+
+void
+pool_do_put(struct pool *pp, void *v)
+{
+       struct pool_item *pi = v;
+       struct pool_page_header *ph;
+
        splassert(pp->pr_ipl);
 
        ph = pr_find_pagehead(pp, v);
@@ -771,27 +803,6 @@ pool_put(struct pool *pp, void *v)
                TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
                pool_update_curpage(pp);
        }
-
-       pp->pr_nout--;
-       pp->pr_nput++;
-
-       /* is it time to free a page? */
-       if (pp->pr_nidle > pp->pr_maxpages &&
-           (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
-           (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
-               freeph = ph;
-               pool_p_remove(pp, freeph);
-       }
-       mtx_leave(&pp->pr_mtx);
-
-       if (freeph != NULL)
-               pool_p_free(pp, freeph);
-
-       if (!TAILQ_EMPTY(&pp->pr_requests)) {
-               mtx_enter(&pp->pr_requests_mtx);
-               pool_runqueue(pp, PR_NOWAIT);
-               mtx_leave(&pp->pr_requests_mtx);
-       }
 }
 
 /*
@@ -1466,6 +1477,11 @@ pool_gc_pages(void *null)
        rw_enter_read(&pool_lock);
        s = splvm(); /* XXX go to splvm until all pools _setipl properly */
        SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
+#ifdef MULTIPROCESSOR
+               if (pp->pr_cache != NULL)
+                       pool_cache_gc(pp);
+#endif
+
                if (pp->pr_nidle <= pp->pr_minpages || /* guess */
                    !mtx_enter_try(&pp->pr_mtx)) /* try */
                        continue;
@@ -1632,8 +1648,10 @@ pool_cache_init(struct pool *pp)
        arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
        TAILQ_INIT(&pp->pr_cache_lists);
        pp->pr_cache_nlist = 0;
+       pp->pr_cache_tick = ticks;
        pp->pr_cache_items = 8;
        pp->pr_cache_contention = 0;
+       pp->pr_cache_ngc = 0;
 
        CPUMEM_FOREACH(pc, &i, cm) {
                pc->pc_actv = NULL;
@@ -1649,6 +1667,8 @@ pool_cache_init(struct pool *pp)
                pc->pc_nout = 0;
        }
 
+       membar_producer();
+
        pp->pr_cache = cm;
 }
 
@@ -1730,6 +1750,9 @@ pool_cache_list_free(struct pool *pp, st
     struct pool_cache_item *ci)
 {
        pool_list_enter(pp);
+       if (TAILQ_EMPTY(&pp->pr_cache_lists))
+               pp->pr_cache_tick = ticks;
+
        TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
        pp->pr_cache_nlist++;
 
@@ -1864,11 +1887,13 @@ pool_cache_list_put(struct pool *pp, str
 
        rpl = TAILQ_NEXT(pl, ci_nextl);
 
+       mtx_enter(&pp->pr_mtx);
        do {
                next = pl->ci_next;
-               pool_put(pp, pl);
+               pool_do_put(pp, pl);
                pl = next;
        } while (pl != NULL);
+       mtx_leave(&pp->pr_mtx);
 
        return (rpl);
 }
@@ -1881,8 +1906,10 @@ pool_cache_destroy(struct pool *pp)
        struct cpumem_iter i;
        struct cpumem *cm;
 
+       rw_enter_write(&pool_lock); /* serialise with the gc */
        cm = pp->pr_cache;
        pp->pr_cache = NULL; /* make pool_put avoid the cache */
+       rw_exit_write(&pool_lock);
 
        CPUMEM_FOREACH(pc, &i, cm) {
                pool_cache_list_put(pp, pc->pc_actv);
@@ -1897,6 +1924,29 @@ pool_cache_destroy(struct pool *pp)
 }
 
 void
+pool_cache_gc(struct pool *pp)
+{
+       if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
+           !TAILQ_EMPTY(&pp->pr_cache_lists) &&
+           mtx_enter_try(&pp->pr_cache_mtx)) {
+               struct pool_cache_item *pl = NULL;
+
+               pl = TAILQ_FIRST(&pp->pr_cache_lists);
+               if (pl != NULL) {
+                       TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
+                       pp->pr_cache_tick = ticks;
+                       pp->pr_cache_nlist--;
+
+                       pp->pr_cache_ngc++;
+               }
+
+               mtx_leave(&pp->pr_cache_mtx);
+
+               pool_cache_list_put(pp, pl);
+       }
+}
+
+void
 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
 {
        struct pool_cache *pc;
@@ -1943,7 +1993,7 @@ pool_cache_info(struct pool *pp, void *o
        memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
 
        mtx_enter(&pp->pr_cache_mtx);
-       kpc.pr_ngc = 0; /* notyet */
+       kpc.pr_ngc = pp->pr_cache_ngc;
        kpc.pr_len = pp->pr_cache_items;
        kpc.pr_nlist = pp->pr_cache_nlist;
        kpc.pr_contention = pp->pr_cache_contention;

Reply via email to