Moving this to tech@ Hi,
I was studying the scheduler code after watching tedu's talk, I'd like to expose this statistics to userland so that I can try playing with cache affinity in the future: gimli:src: sysctl kern.schedstat kern.schedstat.nmigrations=23744 kern.schedstat.noidle=0 kern.schedstat.stolen=9170 kern.schedstat.choose=834843 kern.schedstat.wasidle=808711 kern.schedstat.nomigrations=2388 Opinions ? Index: sys/sys//sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.29 diff -d -u -p -w -r1.29 sched.h --- sys/sys//sched.h 7 Jul 2011 18:00:33 -0000 1.29 +++ sys/sys//sched.h 12 Nov 2011 13:51:04 -0000 @@ -75,6 +75,34 @@ * Posix defines a <sched.h> which may want to include <sys/sched.h> */ +struct schedstat { + u_int64_t scs_nmigrations; + u_int64_t scs_noidle; + u_int64_t scs_stolen; + + u_int64_t scs_choose; + u_int64_t scs_wasidle; + u_int64_t scs_nomigrations; +}; + +/* These sysctl names are only really used by sysctl(8) */ +#define KERN_SCHEDSTAT_NMIGRATIONS 1 +#define KERN_SCHEDSTAT_NOIDLE 2 +#define KERN_SCHEDSTAT_STOLEN 3 +#define KERN_SCHEDSTAT_CHOOSE 4 +#define KERN_SCHEDSTAT_WASIDLE 5 +#define KERN_SCHEDSTAT_NOMIGRATIONS 6 +#define KERN_SCHEDSTAT_MAXID 7 + +#define CTL_KERN_SCHEDSTAT_NAMES { \ + { 0, 0 }, \ + { "nmigrations", CTLTYPE_QUAD }, \ + { "noidle", CTLTYPE_QUAD }, \ + { "stolen", CTLTYPE_QUAD }, \ + { "choose", CTLTYPE_QUAD }, \ + { "wasidle", CTLTYPE_QUAD }, \ + { "nomigrations", CTLTYPE_QUAD } \ +} /* * CPU states. * XXX Not really scheduler state, but no other good place to put Index: sys/sys//sysctl.h =================================================================== RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.117 diff -d -u -p -w -r1.117 sysctl.h --- sys/sys//sysctl.h 30 Aug 2011 01:09:29 -0000 1.117 +++ sys/sys//sysctl.h 12 Nov 2011 13:40:45 -0000 @@ -189,7 +189,8 @@ struct ctlname { #define KERN_CONSDEV 75 /* dev_t: console terminal device */ #define KERN_NETLIVELOCKS 76 /* int: number of network livelocks */ #define KERN_POOL_DEBUG 77 /* int: enable pool_debug */ -#define KERN_MAXID 78 /* number of valid kern ids */ +#define KERN_SCHEDSTAT 78 /* struct: sched statistics */ +#define KERN_MAXID 79 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ @@ -270,6 +271,7 @@ struct ctlname { { "consdev", CTLTYPE_STRUCT }, \ { "netlivelocks", CTLTYPE_INT }, \ { "pool_debug", CTLTYPE_INT }, \ + { "schedstat", CTLTYPE_STRUCT }, \ } /* Index: sys/kern//kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.24 diff -d -u -p -w -r1.24 kern_sched.c --- sys/kern//kern_sched.c 12 Oct 2011 18:30:09 -0000 1.24 +++ sys/kern//kern_sched.c 12 Nov 2011 14:41:59 -0000 @@ -35,6 +35,8 @@ void sched_kthreads_create(void *); int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); struct proc *sched_steal_proc(struct cpu_info *); +struct schedstat schedstat; + /* * To help choosing which cpu should run which process we keep track * of cpus which are currently idle and which cpus have processes @@ -301,14 +303,6 @@ again: return (p); } -uint64_t sched_nmigrations; -uint64_t sched_noidle; -uint64_t sched_stolen; - -uint64_t sched_choose; -uint64_t sched_wasidle; -uint64_t sched_nomigrations; - struct cpu_info * sched_choosecpu_fork(struct proc *parent, int flags) { @@ -374,7 +368,7 @@ sched_choosecpu(struct proc *p) if (p->p_flag & P_CPUPEG) return (p->p_cpu); - sched_choose++; + schedstat.scs_choose++; /* * Look at all cpus that are currently idle and have nothing queued. @@ -393,7 +387,7 @@ sched_choosecpu(struct proc *p) if (cpuset_isset(&set, p->p_cpu) || (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 && curproc == p)) { - sched_wasidle++; + schedstat.scs_wasidle++; return (p->p_cpu); } @@ -411,9 +405,9 @@ sched_choosecpu(struct proc *p) } if (p->p_cpu != choice) - sched_nmigrations++; + schedstat.scs_nmigrations++; else - sched_nomigrations++; + schedstat.scs_nomigrations++; return (choice); } @@ -461,7 +455,7 @@ sched_steal_proc(struct cpu_info *self) remrunqueue(best); best->p_cpu = self; - sched_stolen++; + schedstat.scs_stolen++; return (best); } Index: sys/kern//kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.208 diff -d -u -p -w -r1.208 kern_sysctl.c --- sys/kern//kern_sysctl.c 18 Sep 2011 13:23:38 -0000 1.208 +++ sys/kern//kern_sysctl.c 12 Nov 2011 15:04:52 -0000 @@ -112,6 +112,7 @@ extern struct disklist_head disklist; extern fixpt_t ccpu; extern long numvnodes; extern u_int mcllivelocks; +extern struct schedstat schedstat; extern void nmbclust_update(void); @@ -602,6 +603,9 @@ kern_sysctl(int *name, u_int namelen, vo pool_reclaim_all(); return (error); } + case KERN_SCHEDSTAT: + return (sysctl_rdstruct(oldp, oldlenp, newp, &schedstat, + sizeof(struct schedstat))); default: return (EOPNOTSUPP); } Index: sbin/sysctl//sysctl.c =================================================================== RCS file: /cvs/src/sbin/sysctl/sysctl.c,v retrieving revision 1.180 diff -d -u -p -w -r1.180 sysctl.c --- sbin/sysctl//sysctl.c 16 Sep 2011 20:52:48 -0000 1.180 +++ sbin/sysctl//sysctl.c 12 Nov 2011 14:22:19 -0000 @@ -119,6 +119,7 @@ struct ctlname username[] = CTL_USER_NAM struct ctlname debugname[CTL_DEBUG_MAXID]; struct ctlname kernmallocname[] = CTL_KERN_MALLOC_NAMES; struct ctlname forkstatname[] = CTL_KERN_FORKSTAT_NAMES; +struct ctlname schedstatname[] = CTL_KERN_SCHEDSTAT_NAMES; struct ctlname nchstatsname[] = CTL_KERN_NCHSTATS_NAMES; struct ctlname ttysname[] = CTL_KERN_TTY_NAMES; struct ctlname semname[] = CTL_KERN_SEMINFO_NAMES; @@ -198,6 +199,7 @@ static int sysctl_vfsgen(char *, char ** int sysctl_bios(char *, char **, int *, int, int *); int sysctl_swpenc(char *, char **, int *, int, int *); int sysctl_forkstat(char *, char **, int *, int, int *); +int sysctl_schedstat(char *, char **, int *, int, int *); int sysctl_tty(char *, char **, int *, int, int *); int sysctl_nchstats(char *, char **, int *, int, int *); int sysctl_malloc(char *, char **, int *, int, int *); @@ -450,6 +452,9 @@ parse(char *string, int flags) case KERN_NETLIVELOCKS: special |= UNSIGNED; break; + case KERN_SCHEDSTAT: + sysctl_schedstat(string, &bufp, mib, flags, &type); + return; } break; @@ -1622,6 +1627,7 @@ struct list pipexlist = { pipexname, PIP struct list kernmalloclist = { kernmallocname, KERN_MALLOC_MAXID }; struct list forkstatlist = { forkstatname, KERN_FORKSTAT_MAXID }; +struct list schedstatlist = { schedstatname, KERN_SCHEDSTAT_MAXID }; struct list nchstatslist = { nchstatsname, KERN_NCHSTATS_MAXID }; struct list ttylist = { ttysname, KERN_TTY_MAXID }; struct list semlist = { semname, KERN_SEMINFO_MAXID }; @@ -1891,6 +1897,62 @@ sysctl_malloc(char *string, char **bufpp *typep = CTLTYPE_STRING; return (3); } + return (-1); +} + +/* + * handle scheduler statistics + */ +int +sysctl_schedstat(char *string, char **bufpp, int mib[], int flags, int *typep) +{ + static struct schedstat scs; + static int keepvalue = 0; + int indx; + size_t size; + + if (*bufpp == NULL) { + bzero(&scs, sizeof(scs)); + listall(string, &schedstatlist); + return (-1); + } + if ((indx = findname(string, "third", bufpp, &schedstatlist)) == -1) + return (-1); + if (*bufpp != NULL) { + warnx("fourth level name in %s is invalid", string); + return (-1); + } + if (keepvalue == 0) { + size = sizeof(scs); + if (sysctl(mib, 2, &scs, &size, NULL, 0) < 0) + return (-1); + keepvalue = 1; + } + if (!nflag) + (void)printf("%s%s", string, equ); + switch (indx) { + case KERN_SCHEDSTAT_NMIGRATIONS: + (void)printf("%llu\n", scs.scs_nmigrations); + break; + case KERN_SCHEDSTAT_NOIDLE: + (void)printf("%llu\n", scs.scs_noidle); + break; + case KERN_SCHEDSTAT_STOLEN: + (void)printf("%llu\n", scs.scs_stolen); + break; + case KERN_SCHEDSTAT_CHOOSE: + (void)printf("%llu\n", scs.scs_choose); + break; + case KERN_SCHEDSTAT_WASIDLE: + (void)printf("%llu\n", scs.scs_wasidle); + break; + case KERN_SCHEDSTAT_NOMIGRATIONS: + (void)printf("%llu\n", scs.scs_nomigrations); + break; + default: + break; + } + return (-1); }