The diff below adds a kernel thread that makes memory pages filled
with zeroes without olding the kernel lock.  The idea is that this
should speed up MP systems because the kernel can do some useful work
in parallel with other things, and could lower the latency on all
systems because (userland) memory page allocation will be faster.  The
thread runs at the absolutely lowest priority such that we only run it
if we don't have anything else to do.

But this could also slow down some systems though, because zeroing
pages can thrash the caches in your system.

So I'd like to see this diff tested on a wide variety of systems, and
hear back from people how this diff affects their OpenBSD systems.
Subjective reports are ok; hard benchmarks are better.

One thing to look at when you're running this diff is the output of
"vmstat -s".  It will report something like:

    2146380 zeroed page hits
      34258 zeroed page misses

You want the number of hits to be significantly larger than the number
of misses.  And you want the number of hits to keep growing.  Another
thing to look at is "systat vm".  If you mke your screen a bit bigger
than 25 lines, you'll see a "pzidle" counter which indicates how much
zeroed pages have been consumed (negative) or created (positive).

Thanks,

Mark

Index: uvm/uvm_extern.h
===================================================================
RCS file: /home/cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.119
diff -u -p -r1.119 uvm_extern.h
--- uvm/uvm_extern.h    11 Jul 2014 16:35:40 -0000      1.119
+++ uvm/uvm_extern.h    12 Jul 2014 19:02:23 -0000
@@ -519,6 +519,7 @@ void                        uvm_vnp_sync(struct mount *);
 void                   uvm_vnp_terminate(struct vnode *);
 boolean_t              uvm_vnp_uncache(struct vnode *);
 struct uvm_object      *uvn_attach(struct vnode *, vm_prot_t);
+void                   uvm_pagezero_thread(void *);
 void                   kmeminit_nkmempages(void);
 void                   kmeminit(void);
 extern u_int           nkmempages;
Index: uvm/uvm_page.h
===================================================================
RCS file: /home/cvs/src/sys/uvm/uvm_page.h,v
retrieving revision 1.54
diff -u -p -r1.54 uvm_page.h
--- uvm/uvm_page.h      11 Jul 2014 16:35:40 -0000      1.54
+++ uvm/uvm_page.h      12 Jul 2014 19:02:23 -0000
@@ -296,7 +296,7 @@ int         vm_physseg_find(paddr_t, int *);
 #define uvm_lock_fpageq()      mtx_enter(&uvm.fpageqlock);
 #define uvm_unlock_fpageq()    mtx_leave(&uvm.fpageqlock);
 
-#define        UVM_PAGEZERO_TARGET     (uvmexp.free)
+#define        UVM_PAGEZERO_TARGET     (uvmexp.free / 8)
 
 #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr)
 
Index: uvm/uvm_pmemrange.c
===================================================================
RCS file: /home/cvs/src/sys/uvm/uvm_pmemrange.c,v
retrieving revision 1.41
diff -u -p -r1.41 uvm_pmemrange.c
--- uvm/uvm_pmemrange.c 14 Sep 2014 14:17:27 -0000      1.41
+++ uvm/uvm_pmemrange.c 24 Sep 2014 15:29:00 -0000
@@ -21,6 +21,7 @@
 #include <uvm/uvm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
+#include <sys/kthread.h>
 #include <sys/mount.h>
 
 /*
@@ -107,7 +108,7 @@ void        uvm_pmr_assertvalid(struct uvm_pmem
 #endif
 
 int                     uvm_pmr_get1page(psize_t, int, struct pglist *,
-                           paddr_t, paddr_t);
+                           paddr_t, paddr_t, int);
 
 struct uvm_pmemrange   *uvm_pmr_allocpmr(void);
 struct vm_page         *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
@@ -824,7 +825,7 @@ retry_desperate:
        if (count <= maxseg && align == 1 && boundary == 0 &&
            (flags & UVM_PLA_TRYCONTIG) == 0) {
                fcount += uvm_pmr_get1page(count - fcount, memtype_init,
-                   result, start, end);
+                   result, start, end, 0);
 
                /*
                 * If we found sufficient pages, go to the succes exit code.
@@ -1036,6 +1037,8 @@ out:
 
                if (found->pg_flags & PG_ZERO) {
                        uvmexp.zeropages--;
+                       if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+                               wakeup(&uvmexp.zeropages);
                }
                if (flags & UVM_PLA_ZERO) {
                        if (found->pg_flags & PG_ZERO)
@@ -1130,6 +1133,8 @@ uvm_pmr_freepages(struct vm_page *pg, ps
                pg += pmr_count;
        }
        wakeup(&uvmexp.free);
+       if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+               wakeup(&uvmexp.zeropages);
 
        uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg), ptoa(count));
 
@@ -1167,6 +1172,8 @@ uvm_pmr_freepageq(struct pglist *pgl)
                uvm_wakeup_pla(pstart, ptoa(plen));
        }
        wakeup(&uvmexp.free);
+       if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+               wakeup(&uvmexp.zeropages);
        uvm_unlock_fpageq();
 
        return;
@@ -1663,7 +1670,7 @@ uvm_pmr_rootupdate(struct uvm_pmemrange 
  */
 int
 uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result,
-    paddr_t start, paddr_t end)
+    paddr_t start, paddr_t end, int memtype_only)
 {
        struct  uvm_pmemrange *pmr;
        struct  vm_page *found, *splitpg;
@@ -1779,6 +1786,8 @@ uvm_pmr_get1page(psize_t count, int memt
                                uvm_pmr_remove_addr(pmr, found);
                                uvm_pmr_assertvalid(pmr);
                        } else {
+                               if (memtype_only)
+                                       break;
                                /*
                                 * Skip to the next memtype.
                                 */
@@ -1941,5 +1950,42 @@ uvm_wakeup_pla(paddr_t low, psize_t len)
                                wakeup(pma);
                        }
                }
+       }
+}
+
+void
+uvm_pagezero_thread(void *arg)
+{
+       struct pglist pgl;
+       struct vm_page *pg;
+       int count;
+
+       /* Run at the lowest possible priority. */
+       curproc->p_p->ps_nice = NZERO + PRIO_MAX;
+
+       KERNEL_UNLOCK();
+
+       for (;;) {
+               uvm_lock_fpageq();
+               while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET ||
+                   (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY,
+                    &pgl, 0, 0, 1)) == 0) {
+                       msleep(&uvmexp.zeropages, &uvm.fpageqlock, MAXPRI,
+                           "pgzero", 0);
+               }
+               uvm_unlock_fpageq();
+
+               TAILQ_FOREACH(pg, &pgl, pageq) {
+                       uvm_pagezero(pg);
+                       atomic_setbits_int(&pg->pg_flags, PG_ZERO);
+               }
+
+               uvm_lock_fpageq();
+               while (!TAILQ_EMPTY(&pgl))
+                       uvm_pmr_remove_1strange(&pgl, 0, NULL, 0);
+               uvmexp.zeropages += count;
+               uvm_unlock_fpageq();
+
+               yield();
        }
 }

Reply via email to