> Date: Mon, 21 Dec 2020 16:46:32 -0300 > From: Martin Pieuchot <m...@openbsd.org> > > During a page fault multiples counters are updated. They fall into two > categories "fault counters" and "global statistics" both of which are > currently represented by int-sized fields inside a global: `uvmexp'. > > Diff below makes use of the per-CPU counters_inc(9) API to make sure no > update is lost with an unlocked fault handler. I only converted the > fields touched by uvm_fault() to have a working solution and start a > discussion. > > - Should we keep a single enum for all fields inside `uvmexp' or do we > want to separate "statistics counters" which are mostly used sys/arch > from "fault counters" which are only used in uvm/uvm_fault.c? > > - The counter_add(9) API deals with uint64_t and currently uvmexp uses > int. Should we truncate or change the size of uvmexp fields or do > something else? > > Comments?
I think this breaks "show uvmexp" in ddb. You fear that using atomic operations for these counters would lead to too much bus contention on systems with a large number of CPUs? > Index: kern/init_main.c > =================================================================== > RCS file: /cvs/src/sys/kern/init_main.c,v > retrieving revision 1.302 > diff -u -p -r1.302 init_main.c > --- kern/init_main.c 7 Dec 2020 16:55:28 -0000 1.302 > +++ kern/init_main.c 21 Dec 2020 19:37:13 -0000 > @@ -432,6 +432,7 @@ main(void *framep) > #endif > > mbcpuinit(); /* enable per cpu mbuf data */ > + uvm_init_percpu(); > > /* init exec and emul */ > init_exec(); > Index: uvm/uvm_extern.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_extern.h,v > retrieving revision 1.155 > diff -u -p -r1.155 uvm_extern.h > --- uvm/uvm_extern.h 1 Dec 2020 13:56:22 -0000 1.155 > +++ uvm/uvm_extern.h 21 Dec 2020 19:37:13 -0000 > @@ -289,6 +289,7 @@ void uvm_vsunlock_device(struct proc > * > void *); > void uvm_pause(void); > void uvm_init(void); > +void uvm_init_percpu(void); > int uvm_io(vm_map_t, struct uio *, int); > > #define UVM_IO_FIXPROT 0x01 > Index: uvm/uvm_fault.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_fault.c,v > retrieving revision 1.109 > diff -u -p -r1.109 uvm_fault.c > --- uvm/uvm_fault.c 8 Dec 2020 12:26:31 -0000 1.109 > +++ uvm/uvm_fault.c 21 Dec 2020 19:37:13 -0000 > @@ -35,6 +35,7 @@ > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/kernel.h> > +#include <sys/percpu.h> > #include <sys/proc.h> > #include <sys/malloc.h> > #include <sys/mman.h> > @@ -271,7 +272,7 @@ uvmfault_anonget(struct uvm_faultinfo *u > int result; > > result = 0; /* XXX shut up gcc */ > - uvmexp.fltanget++; > + counters_inc(uvmexp_counters, flt_anget); > /* bump rusage counters */ > if (anon->an_page) > curproc->p_ru.ru_minflt++; > @@ -295,7 +296,7 @@ uvmfault_anonget(struct uvm_faultinfo *u > if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0) > return (VM_PAGER_OK); > atomic_setbits_int(&pg->pg_flags, PG_WANTED); > - uvmexp.fltpgwait++; > + counters_inc(uvmexp_counters, flt_pgwait); > > /* > * the last unlock must be an atomic unlock+wait on > @@ -310,7 +311,7 @@ uvmfault_anonget(struct uvm_faultinfo *u > > if (pg == NULL) { /* out of RAM. */ > uvmfault_unlockall(ufi, amap, NULL); > - uvmexp.fltnoram++; > + counters_inc(uvmexp_counters, flt_noram); > uvm_wait("flt_noram1"); > /* ready to relock and try again */ > } else { > @@ -325,7 +326,7 @@ uvmfault_anonget(struct uvm_faultinfo *u > * it is ok to read an_swslot here because > * we hold PG_BUSY on the page. > */ > - uvmexp.pageins++; > + counters_inc(uvmexp_counters, pageins); > result = uvm_swap_get(pg, anon->an_swslot, > PGO_SYNCIO); > > @@ -369,7 +370,7 @@ uvmfault_anonget(struct uvm_faultinfo *u > uvm_anfree(anon); /* frees page for us */ > if (locked) > uvmfault_unlockall(ufi, amap, NULL); > - uvmexp.fltpgrele++; > + counters_inc(uvmexp_counters, flt_pgrele); > return (VM_PAGER_REFAULT); /* refault! */ > } > > @@ -426,7 +427,7 @@ uvmfault_anonget(struct uvm_faultinfo *u > } > > /* try it again! */ > - uvmexp.fltanretry++; > + counters_inc(uvmexp_counters, flt_anretry); > continue; > > } /* while (1) */ > @@ -547,7 +548,7 @@ uvm_fault_check(struct uvm_faultinfo *uf > /* need to clear */ > uvmfault_unlockmaps(ufi, FALSE); > uvmfault_amapcopy(ufi); > - uvmexp.fltamcopy++; > + counters_inc(uvmexp_counters, flt_amcopy); > return (ERESTART); > } else { > /* > @@ -699,7 +700,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf > */ > > if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) { > - uvmexp.flt_acow++; > + counters_inc(uvmexp_counters, flt_acow); > oanon = anon; /* oanon = old */ > anon = uvm_analloc(); > if (anon) { > @@ -710,10 +711,10 @@ uvm_fault_upper(struct uvm_faultinfo *uf > if (anon == NULL || pg == NULL) { > uvmfault_unlockall(ufi, amap, NULL); > if (anon == NULL) > - uvmexp.fltnoanon++; > + counters_inc(uvmexp_counters, flt_noanon); > else { > uvm_anfree(anon); > - uvmexp.fltnoram++; > + counters_inc(uvmexp_counters, flt_noram); > } > > if (uvm_swapisfull()) > @@ -745,7 +746,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf > * thus, no one can get at it until we are done with it. > */ > } else { > - uvmexp.flt_anon++; > + counters_inc(uvmexp_counters, flt_anon); > oanon = anon; > pg = anon->an_page; > if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ > @@ -861,7 +862,7 @@ uvm_fault_upper_lookup(struct uvm_faulti > uvm_lock_pageq(); > uvm_pageactivate(anon->an_page); /* reactivate */ > uvm_unlock_pageq(); > - uvmexp.fltnamap++; > + counters_inc(uvmexp_counters, flt_namap); > > /* > * Since this isn't the page that's actually faulting, > @@ -909,7 +910,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad > struct vm_page *pages[UVM_MAXRANGE]; > int error = ERESTART; > > - uvmexp.faults++; /* XXX: locking? */ > + counters_inc(uvmexp_counters, faults); > TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); > > /* init the IN parameters in the ufi */ > @@ -994,7 +995,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > * ("get" has the option of doing a pmap_enter for us) > */ > if (uobj != NULL) { > - uvmexp.fltlget++; > + counters_inc(uvmexp_counters, flt_lget); > gotpages = flt->npages; > (void) uobj->pgops->pgo_get(uobj, ufi->entry->offset + > (flt->startva - ufi->entry->start), > @@ -1038,7 +1039,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > uvm_lock_pageq(); > uvm_pageactivate(pages[lcv]); /* reactivate */ > uvm_unlock_pageq(); > - uvmexp.fltnomap++; > + counters_inc(uvmexp_counters, flt_nomap); > > /* > * Since this page isn't the page that's > @@ -1109,7 +1110,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > > uvmfault_unlockall(ufi, amap, NULL); > > - uvmexp.fltget++; > + counters_inc(uvmexp_counters, flt_get); > gotpages = 1; > uoff = (ufi->orig_rvaddr - ufi->entry->start) + > ufi->entry->offset; > result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages, > @@ -1183,7 +1184,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > * > * set "pg" to the page we want to map in (uobjpage, usually) > */ > - uvmexp.flt_obj++; > + counters_inc(uvmexp_counters, flt_obj); > if (UVM_ET_ISCOPYONWRITE(ufi->entry)) > flt->enter_prot &= ~PROT_WRITE; > pg = uobjpage; /* map in the actual object */ > @@ -1235,10 +1236,10 @@ uvm_fault_lower(struct uvm_faultinfo *uf > /* unlock and fail ... */ > uvmfault_unlockall(ufi, amap, uobj); > if (anon == NULL) > - uvmexp.fltnoanon++; > + counters_inc(uvmexp_counters, flt_noanon); > else { > uvm_anfree(anon); > - uvmexp.fltnoram++; > + counters_inc(uvmexp_counters, flt_noram); > } > > if (uvm_swapisfull()) > @@ -1254,7 +1255,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > > /* fill in the data */ > if (uobjpage != PGO_DONTCARE) { > - uvmexp.flt_prcopy++; > + counters_inc(uvmexp_counters, flt_prcopy); > /* copy page [pg now dirty] */ > uvm_pagecopy(uobjpage, pg); > > @@ -1277,7 +1278,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > uvm_unlock_pageq(); > uobj = NULL; > } else { > - uvmexp.flt_przero++; > + counters_inc(uvmexp_counters, flt_przero); > /* > * Page is zero'd and marked dirty by uvm_pagealloc() > * above. > @@ -1288,7 +1289,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf > ufi->orig_rvaddr - ufi->entry->start, anon, 0)) { > uvmfault_unlockall(ufi, amap, NULL); > uvm_anfree(anon); > - uvmexp.fltnoamap++; > + counters_inc(uvmexp_counters, flt_noamap); > > if (uvm_swapisfull()) > return (ENOMEM); > @@ -1580,7 +1581,7 @@ uvmfault_relock(struct uvm_faultinfo *uf > return TRUE; > } > > - uvmexp.fltrelck++; > + counters_inc(uvmexp_counters, flt_relck); > > /* > * relock map. fail if version mismatch (in which case nothing > @@ -1592,6 +1593,6 @@ uvmfault_relock(struct uvm_faultinfo *uf > return(FALSE); > } > > - uvmexp.fltrelckok++; > + counters_inc(uvmexp_counters, flt_relckok); > return(TRUE); /* got it! */ > } > Index: uvm/uvm_init.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_init.c,v > retrieving revision 1.40 > diff -u -p -r1.40 uvm_init.c > --- uvm/uvm_init.c 11 May 2017 00:42:05 -0000 1.40 > +++ uvm/uvm_init.c 21 Dec 2020 19:37:13 -0000 > @@ -35,6 +35,7 @@ > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/filedesc.h> > +#include <sys/percpu.h> > #include <sys/resourcevar.h> > #include <sys/mman.h> > #include <sys/malloc.h> > @@ -52,6 +53,9 @@ > struct uvm uvm; /* decl */ > struct uvmexp uvmexp; /* decl */ > > +COUNTERS_BOOT_MEMORY(uvmexp_countersboot, exp_ncounters); > +struct cpumem *uvmexp_counters = > COUNTERS_BOOT_INITIALIZER(uvmexp_countersboot); > + > #if defined(VM_MIN_KERNEL_ADDRESS) > vaddr_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; > #else > @@ -184,4 +188,10 @@ uvm_init(void) > uaddr_bestfit_create(vm_map_min(kmem_map), > vm_map_max(kmem_map))); > #endif /* !SMALL_KERNEL */ > +} > + > +void > +uvm_init_percpu(void) > +{ > + uvmexp_counters = counters_alloc_ncpus(uvmexp_counters, exp_ncounters); > } > Index: uvm/uvm_meter.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_meter.c,v > retrieving revision 1.41 > diff -u -p -r1.41 uvm_meter.c > --- uvm/uvm_meter.c 24 Jun 2020 22:03:45 -0000 1.41 > +++ uvm/uvm_meter.c 21 Dec 2020 19:37:13 -0000 > @@ -39,6 +39,7 @@ > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/kernel.h> > +#include <sys/percpu.h> > #include <sys/proc.h> > #include <sys/sysctl.h> > #include <sys/vmmeter.h> > @@ -178,9 +179,42 @@ uvm_sysctl(int *name, u_int namelen, voi > return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals, > sizeof(vmtotals))); > > - case VM_UVMEXP: > - return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp, > - sizeof(uvmexp))); > + case VM_UVMEXP: { > + struct uvmexp uexp; > + uint64_t counters[exp_ncounters]; > + > + memcpy(&uexp, &uvmexp, sizeof(uexp)); > + > + counters_read(uvmexp_counters, counters, exp_ncounters); > + > + /* stat counters */ > + uexp.faults = (int)counters[faults]; > + uexp.pageins = (int)counters[pageins]; > + > + /* fault subcounters */ > + uexp.fltnoram = (int)counters[flt_noram]; > + uexp.fltnoanon = (int)counters[flt_noanon]; > + uexp.fltnoamap = (int)counters[flt_noamap]; > + uexp.fltpgwait = (int)counters[flt_pgwait]; > + uexp.fltpgrele = (int)counters[flt_pgrele]; > + uexp.fltrelck = (int)counters[flt_relck]; > + uexp.fltrelckok = (int)counters[flt_relckok]; > + uexp.fltanget = (int)counters[flt_anget]; > + uexp.fltanretry = (int)counters[flt_anretry]; > + uexp.fltamcopy = (int)counters[flt_amcopy]; > + uexp.fltnamap = (int)counters[flt_namap]; > + uexp.fltnomap = (int)counters[flt_nomap]; > + uexp.fltlget = (int)counters[flt_lget]; > + uexp.fltget = (int)counters[flt_get]; > + uexp.flt_anon = (int)counters[flt_anon]; > + uexp.flt_acow = (int)counters[flt_acow]; > + uexp.flt_obj = (int)counters[flt_obj]; > + uexp.flt_prcopy = (int)counters[flt_prcopy]; > + uexp.flt_przero = (int)counters[flt_przero]; > + > + return (sysctl_rdstruct(oldp, oldlenp, newp, &uexp, > + sizeof(uexp))); > + } > > case VM_NKMEMPAGES: > return (sysctl_rdint(oldp, oldlenp, newp, nkmempages)); > Index: uvm/uvmexp.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvmexp.h,v > retrieving revision 1.7 > diff -u -p -r1.7 uvmexp.h > --- uvm/uvmexp.h 14 Dec 2020 13:29:18 -0000 1.7 > +++ uvm/uvmexp.h 21 Dec 2020 19:37:13 -0000 > @@ -156,4 +156,41 @@ struct _ps_strings { > void *val; > }; > > +#ifdef _KERNEL > + > +/* > + * Per-cpu UVM counters. > + */ > +extern struct cpumem *uvmexp_counters; > + > +enum uvm_exp_counters { > + /* stat counters */ > + faults, /* page fault count */ > + pageins, /* pagein operation count */ > + > + /* fault subcounters */ > + flt_noram, /* number of times fault was out of ram */ > + flt_noanon, /* number of times fault was out of anons */ > + flt_noamap, /* number of times fault was out of amap chunks */ > + flt_pgwait, /* number of times fault had to wait on a page */ > + flt_pgrele, /* number of times fault found a released page */ > + flt_relck, /* number of times fault relock called */ > + flt_relckok, /* number of times fault relock is a success */ > + flt_anget, /* number of times fault gets anon page */ > + flt_anretry, /* number of times fault retrys an anon get */ > + flt_amcopy, /* number of times fault clears "needs copy" */ > + flt_namap, /* number of times fault maps a neighbor anon page */ > + flt_nomap, /* number of times fault maps a neighbor obj page */ > + flt_lget, /* number of times fault does a locked pgo_get */ > + flt_get, /* number of times fault does an unlocked get */ > + flt_anon, /* number of times fault anon (case 1a) */ > + flt_acow, /* number of times fault anon cow (case 1b) */ > + flt_obj, /* number of times fault is on object page (2a) */ > + flt_prcopy, /* number of times fault promotes with copy (2b) */ > + flt_przero, /* number of times fault promotes with zerofill (2b) */ > + > + exp_ncounters > +}; > + > +#endif /* _KERNEL */ > #endif /*_UVM_UVMEXP_ */ > >