The diff below adds a kernel thread that makes memory pages filled with zeroes without olding the kernel lock. The idea is that this should speed up MP systems because the kernel can do some useful work in parallel with other things, and could lower the latency on all systems because (userland) memory page allocation will be faster. The thread runs at the absolutely lowest priority such that we only run it if we don't have anything else to do.
But this could also slow down some systems though, because zeroing pages can thrash the caches in your system. So I'd like to see this diff tested on a wide variety of systems, and hear back from people how this diff affects their OpenBSD systems. Subjective reports are ok; hard benchmarks are better. One thing to look at when you're running this diff is the output of "vmstat -s". It will report something like: 2146380 zeroed page hits 34258 zeroed page misses You want the number of hits to be significantly larger than the number of misses. And you want the number of hits to keep growing. Another thing to look at is "systat vm". If you mke your screen a bit bigger than 25 lines, you'll see a "pzidle" counter which indicates how much zeroed pages have been consumed (negative) or created (positive). Thanks, Mark Index: uvm/uvm_extern.h =================================================================== RCS file: /home/cvs/src/sys/uvm/uvm_extern.h,v retrieving revision 1.119 diff -u -p -r1.119 uvm_extern.h --- uvm/uvm_extern.h 11 Jul 2014 16:35:40 -0000 1.119 +++ uvm/uvm_extern.h 12 Jul 2014 19:02:23 -0000 @@ -519,6 +519,7 @@ void uvm_vnp_sync(struct mount *); void uvm_vnp_terminate(struct vnode *); boolean_t uvm_vnp_uncache(struct vnode *); struct uvm_object *uvn_attach(struct vnode *, vm_prot_t); +void uvm_pagezero_thread(void *); void kmeminit_nkmempages(void); void kmeminit(void); extern u_int nkmempages; Index: uvm/uvm_page.h =================================================================== RCS file: /home/cvs/src/sys/uvm/uvm_page.h,v retrieving revision 1.54 diff -u -p -r1.54 uvm_page.h --- uvm/uvm_page.h 11 Jul 2014 16:35:40 -0000 1.54 +++ uvm/uvm_page.h 12 Jul 2014 19:02:23 -0000 @@ -296,7 +296,7 @@ int vm_physseg_find(paddr_t, int *); #define uvm_lock_fpageq() mtx_enter(&uvm.fpageqlock); #define uvm_unlock_fpageq() mtx_leave(&uvm.fpageqlock); -#define UVM_PAGEZERO_TARGET (uvmexp.free) +#define UVM_PAGEZERO_TARGET (uvmexp.free / 8) #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) Index: uvm/uvm_pmemrange.c =================================================================== RCS file: /home/cvs/src/sys/uvm/uvm_pmemrange.c,v retrieving revision 1.41 diff -u -p -r1.41 uvm_pmemrange.c --- uvm/uvm_pmemrange.c 14 Sep 2014 14:17:27 -0000 1.41 +++ uvm/uvm_pmemrange.c 24 Sep 2014 15:29:00 -0000 @@ -21,6 +21,7 @@ #include <uvm/uvm.h> #include <sys/malloc.h> #include <sys/kernel.h> +#include <sys/kthread.h> #include <sys/mount.h> /* @@ -107,7 +108,7 @@ void uvm_pmr_assertvalid(struct uvm_pmem #endif int uvm_pmr_get1page(psize_t, int, struct pglist *, - paddr_t, paddr_t); + paddr_t, paddr_t, int); struct uvm_pmemrange *uvm_pmr_allocpmr(void); struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int); @@ -824,7 +825,7 @@ retry_desperate: if (count <= maxseg && align == 1 && boundary == 0 && (flags & UVM_PLA_TRYCONTIG) == 0) { fcount += uvm_pmr_get1page(count - fcount, memtype_init, - result, start, end); + result, start, end, 0); /* * If we found sufficient pages, go to the succes exit code. @@ -1036,6 +1037,8 @@ out: if (found->pg_flags & PG_ZERO) { uvmexp.zeropages--; + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + wakeup(&uvmexp.zeropages); } if (flags & UVM_PLA_ZERO) { if (found->pg_flags & PG_ZERO) @@ -1130,6 +1133,8 @@ uvm_pmr_freepages(struct vm_page *pg, ps pg += pmr_count; } wakeup(&uvmexp.free); + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + wakeup(&uvmexp.zeropages); uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg), ptoa(count)); @@ -1167,6 +1172,8 @@ uvm_pmr_freepageq(struct pglist *pgl) uvm_wakeup_pla(pstart, ptoa(plen)); } wakeup(&uvmexp.free); + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + wakeup(&uvmexp.zeropages); uvm_unlock_fpageq(); return; @@ -1663,7 +1670,7 @@ uvm_pmr_rootupdate(struct uvm_pmemrange */ int uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result, - paddr_t start, paddr_t end) + paddr_t start, paddr_t end, int memtype_only) { struct uvm_pmemrange *pmr; struct vm_page *found, *splitpg; @@ -1779,6 +1786,8 @@ uvm_pmr_get1page(psize_t count, int memt uvm_pmr_remove_addr(pmr, found); uvm_pmr_assertvalid(pmr); } else { + if (memtype_only) + break; /* * Skip to the next memtype. */ @@ -1941,5 +1950,42 @@ uvm_wakeup_pla(paddr_t low, psize_t len) wakeup(pma); } } + } +} + +void +uvm_pagezero_thread(void *arg) +{ + struct pglist pgl; + struct vm_page *pg; + int count; + + /* Run at the lowest possible priority. */ + curproc->p_p->ps_nice = NZERO + PRIO_MAX; + + KERNEL_UNLOCK(); + + for (;;) { + uvm_lock_fpageq(); + while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET || + (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY, + &pgl, 0, 0, 1)) == 0) { + msleep(&uvmexp.zeropages, &uvm.fpageqlock, MAXPRI, + "pgzero", 0); + } + uvm_unlock_fpageq(); + + TAILQ_FOREACH(pg, &pgl, pageq) { + uvm_pagezero(pg); + atomic_setbits_int(&pg->pg_flags, PG_ZERO); + } + + uvm_lock_fpageq(); + while (!TAILQ_EMPTY(&pgl)) + uvm_pmr_remove_1strange(&pgl, 0, NULL, 0); + uvmexp.zeropages += count; + uvm_unlock_fpageq(); + + yield(); } }