> Date: Mon, 21 Dec 2020 16:46:32 -0300
> From: Martin Pieuchot <m...@openbsd.org>
> 
> During a page fault multiples counters are updated.  They fall into two
> categories "fault counters" and "global statistics" both of which are
> currently represented by int-sized fields inside a global: `uvmexp'.
> 
> Diff below makes use of the per-CPU counters_inc(9) API to make sure no
> update is lost with an unlocked fault handler.  I only converted the
> fields touched by uvm_fault() to have a working solution and start a
> discussion.
> 
> - Should we keep a single enum for all fields inside `uvmexp' or do we
>   want to separate "statistics counters" which are mostly used sys/arch
>   from "fault counters" which are only used in uvm/uvm_fault.c?
> 
> - The counter_add(9) API deals with uint64_t and currently uvmexp uses
>   int.  Should we truncate or change the size of uvmexp fields or do
>   something else?
> 
> Comments?

I think this breaks "show uvmexp" in ddb.

You fear that using atomic operations for these counters would lead to
too much bus contention on systems with a large number of CPUs?

> Index: kern/init_main.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_main.c,v
> retrieving revision 1.302
> diff -u -p -r1.302 init_main.c
> --- kern/init_main.c  7 Dec 2020 16:55:28 -0000       1.302
> +++ kern/init_main.c  21 Dec 2020 19:37:13 -0000
> @@ -432,6 +432,7 @@ main(void *framep)
>  #endif
>  
>       mbcpuinit();    /* enable per cpu mbuf data */
> +     uvm_init_percpu();
>  
>       /* init exec and emul */
>       init_exec();
> Index: uvm/uvm_extern.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.155
> diff -u -p -r1.155 uvm_extern.h
> --- uvm/uvm_extern.h  1 Dec 2020 13:56:22 -0000       1.155
> +++ uvm/uvm_extern.h  21 Dec 2020 19:37:13 -0000
> @@ -289,6 +289,7 @@ void                      uvm_vsunlock_device(struct proc 
> *
>                           void *);
>  void                 uvm_pause(void);
>  void                 uvm_init(void); 
> +void                 uvm_init_percpu(void);
>  int                  uvm_io(vm_map_t, struct uio *, int);
>  
>  #define      UVM_IO_FIXPROT  0x01
> Index: uvm/uvm_fault.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.109
> diff -u -p -r1.109 uvm_fault.c
> --- uvm/uvm_fault.c   8 Dec 2020 12:26:31 -0000       1.109
> +++ uvm/uvm_fault.c   21 Dec 2020 19:37:13 -0000
> @@ -35,6 +35,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/kernel.h>
> +#include <sys/percpu.h>
>  #include <sys/proc.h>
>  #include <sys/malloc.h>
>  #include <sys/mman.h>
> @@ -271,7 +272,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>       int result;
>  
>       result = 0;             /* XXX shut up gcc */
> -     uvmexp.fltanget++;
> +     counters_inc(uvmexp_counters, flt_anget);
>          /* bump rusage counters */
>       if (anon->an_page)
>               curproc->p_ru.ru_minflt++;
> @@ -295,7 +296,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>                       if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0)
>                               return (VM_PAGER_OK);
>                       atomic_setbits_int(&pg->pg_flags, PG_WANTED);
> -                     uvmexp.fltpgwait++;
> +                     counters_inc(uvmexp_counters, flt_pgwait);
>  
>                       /*
>                        * the last unlock must be an atomic unlock+wait on
> @@ -310,7 +311,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>  
>                       if (pg == NULL) {               /* out of RAM.  */
>                               uvmfault_unlockall(ufi, amap, NULL);
> -                             uvmexp.fltnoram++;
> +                             counters_inc(uvmexp_counters, flt_noram);
>                               uvm_wait("flt_noram1");
>                               /* ready to relock and try again */
>                       } else {
> @@ -325,7 +326,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>                                * it is ok to read an_swslot here because
>                                * we hold PG_BUSY on the page.
>                                */
> -                             uvmexp.pageins++;
> +                             counters_inc(uvmexp_counters, pageins);
>                               result = uvm_swap_get(pg, anon->an_swslot,
>                                   PGO_SYNCIO);
>  
> @@ -369,7 +370,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>                               uvm_anfree(anon);       /* frees page for us */
>                               if (locked)
>                                       uvmfault_unlockall(ufi, amap, NULL);
> -                             uvmexp.fltpgrele++;
> +                             counters_inc(uvmexp_counters, flt_pgrele);
>                               return (VM_PAGER_REFAULT);      /* refault! */
>                       }
>  
> @@ -426,7 +427,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>               }
>  
>               /* try it again! */
> -             uvmexp.fltanretry++;
> +             counters_inc(uvmexp_counters, flt_anretry);
>               continue;
>  
>       } /* while (1) */
> @@ -547,7 +548,7 @@ uvm_fault_check(struct uvm_faultinfo *uf
>                       /* need to clear */
>                       uvmfault_unlockmaps(ufi, FALSE);
>                       uvmfault_amapcopy(ufi);
> -                     uvmexp.fltamcopy++;
> +                     counters_inc(uvmexp_counters, flt_amcopy);
>                       return (ERESTART);
>               } else {
>                       /*
> @@ -699,7 +700,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
>        */
>  
>       if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
> -             uvmexp.flt_acow++;
> +             counters_inc(uvmexp_counters, flt_acow);
>               oanon = anon;           /* oanon = old */
>               anon = uvm_analloc();
>               if (anon) {
> @@ -710,10 +711,10 @@ uvm_fault_upper(struct uvm_faultinfo *uf
>               if (anon == NULL || pg == NULL) {
>                       uvmfault_unlockall(ufi, amap, NULL);
>                       if (anon == NULL)
> -                             uvmexp.fltnoanon++;
> +                             counters_inc(uvmexp_counters, flt_noanon);
>                       else {
>                               uvm_anfree(anon);
> -                             uvmexp.fltnoram++;
> +                             counters_inc(uvmexp_counters, flt_noram);
>                       }
>  
>                       if (uvm_swapisfull())
> @@ -745,7 +746,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
>                * thus, no one can get at it until we are done with it.
>                */
>       } else {
> -             uvmexp.flt_anon++;
> +             counters_inc(uvmexp_counters, flt_anon);
>               oanon = anon;
>               pg = anon->an_page;
>               if (anon->an_ref > 1)     /* disallow writes to ref > 1 anons */
> @@ -861,7 +862,7 @@ uvm_fault_upper_lookup(struct uvm_faulti
>                       uvm_lock_pageq();
>                       uvm_pageactivate(anon->an_page);        /* reactivate */
>                       uvm_unlock_pageq();
> -                     uvmexp.fltnamap++;
> +                     counters_inc(uvmexp_counters, flt_namap);
>  
>                       /*
>                        * Since this isn't the page that's actually faulting,
> @@ -909,7 +910,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>       struct vm_page *pages[UVM_MAXRANGE];
>       int error = ERESTART;
>  
> -     uvmexp.faults++;        /* XXX: locking? */
> +     counters_inc(uvmexp_counters, faults);
>       TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL);
>  
>       /* init the IN parameters in the ufi */
> @@ -994,7 +995,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>        * ("get" has the option of doing a pmap_enter for us)
>        */
>       if (uobj != NULL) {
> -             uvmexp.fltlget++;
> +             counters_inc(uvmexp_counters, flt_lget);
>               gotpages = flt->npages;
>               (void) uobj->pgops->pgo_get(uobj, ufi->entry->offset +
>                               (flt->startva - ufi->entry->start),
> @@ -1038,7 +1039,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>                               uvm_lock_pageq();
>                               uvm_pageactivate(pages[lcv]);   /* reactivate */
>                               uvm_unlock_pageq();
> -                             uvmexp.fltnomap++;
> +                             counters_inc(uvmexp_counters, flt_nomap);
>  
>                               /*
>                                * Since this page isn't the page that's
> @@ -1109,7 +1110,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>  
>               uvmfault_unlockall(ufi, amap, NULL);
>  
> -             uvmexp.fltget++;
> +             counters_inc(uvmexp_counters, flt_get);
>               gotpages = 1;
>               uoff = (ufi->orig_rvaddr - ufi->entry->start) + 
> ufi->entry->offset;
>               result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
> @@ -1183,7 +1184,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>                *
>                * set "pg" to the page we want to map in (uobjpage, usually)
>                */
> -             uvmexp.flt_obj++;
> +             counters_inc(uvmexp_counters, flt_obj);
>               if (UVM_ET_ISCOPYONWRITE(ufi->entry))
>                       flt->enter_prot &= ~PROT_WRITE;
>               pg = uobjpage;          /* map in the actual object */
> @@ -1235,10 +1236,10 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>                       /* unlock and fail ... */
>                       uvmfault_unlockall(ufi, amap, uobj);
>                       if (anon == NULL)
> -                             uvmexp.fltnoanon++;
> +                             counters_inc(uvmexp_counters, flt_noanon);
>                       else {
>                               uvm_anfree(anon);
> -                             uvmexp.fltnoram++;
> +                             counters_inc(uvmexp_counters, flt_noram);
>                       }
>  
>                       if (uvm_swapisfull())
> @@ -1254,7 +1255,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>  
>               /* fill in the data */
>               if (uobjpage != PGO_DONTCARE) {
> -                     uvmexp.flt_prcopy++;
> +                     counters_inc(uvmexp_counters, flt_prcopy);
>                       /* copy page [pg now dirty] */
>                       uvm_pagecopy(uobjpage, pg);
>  
> @@ -1277,7 +1278,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>                       uvm_unlock_pageq();
>                       uobj = NULL;
>               } else {
> -                     uvmexp.flt_przero++;
> +                     counters_inc(uvmexp_counters, flt_przero);
>                       /*
>                        * Page is zero'd and marked dirty by uvm_pagealloc()
>                        * above.
> @@ -1288,7 +1289,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>                   ufi->orig_rvaddr - ufi->entry->start, anon, 0)) {
>                       uvmfault_unlockall(ufi, amap, NULL);
>                       uvm_anfree(anon);
> -                     uvmexp.fltnoamap++;
> +                     counters_inc(uvmexp_counters, flt_noamap);
>  
>                       if (uvm_swapisfull())
>                               return (ENOMEM);
> @@ -1580,7 +1581,7 @@ uvmfault_relock(struct uvm_faultinfo *uf
>               return TRUE;
>       }
>  
> -     uvmexp.fltrelck++;
> +     counters_inc(uvmexp_counters, flt_relck);
>  
>       /*
>        * relock map.   fail if version mismatch (in which case nothing
> @@ -1592,6 +1593,6 @@ uvmfault_relock(struct uvm_faultinfo *uf
>               return(FALSE);
>       }
>  
> -     uvmexp.fltrelckok++;
> +     counters_inc(uvmexp_counters, flt_relckok);
>       return(TRUE);           /* got it! */
>  }
> Index: uvm/uvm_init.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_init.c,v
> retrieving revision 1.40
> diff -u -p -r1.40 uvm_init.c
> --- uvm/uvm_init.c    11 May 2017 00:42:05 -0000      1.40
> +++ uvm/uvm_init.c    21 Dec 2020 19:37:13 -0000
> @@ -35,6 +35,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/filedesc.h>
> +#include <sys/percpu.h>
>  #include <sys/resourcevar.h>
>  #include <sys/mman.h>
>  #include <sys/malloc.h>
> @@ -52,6 +53,9 @@
>  struct uvm uvm;              /* decl */
>  struct uvmexp uvmexp;        /* decl */
>  
> +COUNTERS_BOOT_MEMORY(uvmexp_countersboot, exp_ncounters);
> +struct cpumem *uvmexp_counters = 
> COUNTERS_BOOT_INITIALIZER(uvmexp_countersboot);
> +
>  #if defined(VM_MIN_KERNEL_ADDRESS)
>  vaddr_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS;
>  #else
> @@ -184,4 +188,10 @@ uvm_init(void)
>           uaddr_bestfit_create(vm_map_min(kmem_map),
>           vm_map_max(kmem_map)));
>  #endif /* !SMALL_KERNEL */
> +}
> +
> +void
> +uvm_init_percpu(void)
> +{
> +     uvmexp_counters = counters_alloc_ncpus(uvmexp_counters, exp_ncounters);
>  }
> Index: uvm/uvm_meter.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> retrieving revision 1.41
> diff -u -p -r1.41 uvm_meter.c
> --- uvm/uvm_meter.c   24 Jun 2020 22:03:45 -0000      1.41
> +++ uvm/uvm_meter.c   21 Dec 2020 19:37:13 -0000
> @@ -39,6 +39,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/kernel.h>
> +#include <sys/percpu.h>
>  #include <sys/proc.h>
>  #include <sys/sysctl.h>
>  #include <sys/vmmeter.h>
> @@ -178,9 +179,42 @@ uvm_sysctl(int *name, u_int namelen, voi
>               return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
>                   sizeof(vmtotals)));
>  
> -     case VM_UVMEXP:
> -             return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp,
> -                 sizeof(uvmexp)));
> +     case VM_UVMEXP: {
> +             struct uvmexp uexp;
> +             uint64_t counters[exp_ncounters];
> +
> +             memcpy(&uexp, &uvmexp, sizeof(uexp));
> +
> +             counters_read(uvmexp_counters, counters, exp_ncounters);
> +
> +             /* stat counters */
> +             uexp.faults = (int)counters[faults];
> +             uexp.pageins = (int)counters[pageins];
> +
> +             /* fault subcounters */
> +             uexp.fltnoram = (int)counters[flt_noram];
> +             uexp.fltnoanon = (int)counters[flt_noanon];
> +             uexp.fltnoamap = (int)counters[flt_noamap];
> +             uexp.fltpgwait = (int)counters[flt_pgwait];
> +             uexp.fltpgrele = (int)counters[flt_pgrele];
> +             uexp.fltrelck = (int)counters[flt_relck];
> +             uexp.fltrelckok = (int)counters[flt_relckok];
> +             uexp.fltanget = (int)counters[flt_anget];
> +             uexp.fltanretry = (int)counters[flt_anretry];
> +             uexp.fltamcopy = (int)counters[flt_amcopy];
> +             uexp.fltnamap = (int)counters[flt_namap];
> +             uexp.fltnomap = (int)counters[flt_nomap];
> +             uexp.fltlget = (int)counters[flt_lget];
> +             uexp.fltget = (int)counters[flt_get];
> +             uexp.flt_anon = (int)counters[flt_anon];
> +             uexp.flt_acow = (int)counters[flt_acow];
> +             uexp.flt_obj = (int)counters[flt_obj];
> +             uexp.flt_prcopy = (int)counters[flt_prcopy];
> +             uexp.flt_przero = (int)counters[flt_przero];
> +
> +             return (sysctl_rdstruct(oldp, oldlenp, newp, &uexp,
> +                 sizeof(uexp)));
> +     }
>  
>       case VM_NKMEMPAGES:
>               return (sysctl_rdint(oldp, oldlenp, newp, nkmempages));
> Index: uvm/uvmexp.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvmexp.h,v
> retrieving revision 1.7
> diff -u -p -r1.7 uvmexp.h
> --- uvm/uvmexp.h      14 Dec 2020 13:29:18 -0000      1.7
> +++ uvm/uvmexp.h      21 Dec 2020 19:37:13 -0000
> @@ -156,4 +156,41 @@ struct _ps_strings {
>       void    *val;
>  };
>  
> +#ifdef _KERNEL
> +
> +/*
> + * Per-cpu UVM counters.
> + */
> +extern struct cpumem *uvmexp_counters;
> +
> +enum uvm_exp_counters {
> +     /* stat counters */
> +     faults,         /* page fault count */
> +     pageins,        /* pagein operation count */
> +
> +     /* fault subcounters */
> +     flt_noram,      /* number of times fault was out of ram */
> +     flt_noanon,     /* number of times fault was out of anons */
> +     flt_noamap,     /* number of times fault was out of amap chunks */
> +     flt_pgwait,     /* number of times fault had to wait on a page */
> +     flt_pgrele,     /* number of times fault found a released page */
> +     flt_relck,      /* number of times fault relock called */
> +     flt_relckok,    /* number of times fault relock is a success */
> +     flt_anget,      /* number of times fault gets anon page */
> +     flt_anretry,    /* number of times fault retrys an anon get */
> +     flt_amcopy,     /* number of times fault clears "needs copy" */
> +     flt_namap,      /* number of times fault maps a neighbor anon page */
> +     flt_nomap,      /* number of times fault maps a neighbor obj page */
> +     flt_lget,       /* number of times fault does a locked pgo_get */
> +     flt_get,        /* number of times fault does an unlocked get */
> +     flt_anon,       /* number of times fault anon (case 1a) */
> +     flt_acow,       /* number of times fault anon cow (case 1b) */
> +     flt_obj,        /* number of times fault is on object page (2a) */
> +     flt_prcopy,     /* number of times fault promotes with copy (2b) */
> +     flt_przero,     /* number of times fault promotes with zerofill (2b) */
> +
> +     exp_ncounters
> +};
> +
> +#endif /* _KERNEL */
>  #endif /*_UVM_UVMEXP_ */
> 
> 

Reply via email to