Moving this to tech@

Hi,

I was studying the scheduler code after watching tedu's talk, I'd like
to expose this statistics to userland so that I can try playing with
cache affinity in the future:

gimli:src: sysctl kern.schedstat                                                
                                                                                
      
kern.schedstat.nmigrations=23744
kern.schedstat.noidle=0
kern.schedstat.stolen=9170
kern.schedstat.choose=834843
kern.schedstat.wasidle=808711
kern.schedstat.nomigrations=2388

Opinions ?

Index: sys/sys//sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.29
diff -d -u -p -w -r1.29 sched.h
--- sys/sys//sched.h    7 Jul 2011 18:00:33 -0000       1.29
+++ sys/sys//sched.h    12 Nov 2011 13:51:04 -0000
@@ -75,6 +75,34 @@
  * Posix defines a <sched.h> which may want to include <sys/sched.h>
  */
 
+struct schedstat {
+       u_int64_t scs_nmigrations;
+       u_int64_t scs_noidle;
+       u_int64_t scs_stolen;
+
+       u_int64_t scs_choose;
+       u_int64_t scs_wasidle;
+       u_int64_t scs_nomigrations;
+};
+
+/* These sysctl names are only really used by sysctl(8) */
+#define KERN_SCHEDSTAT_NMIGRATIONS     1
+#define KERN_SCHEDSTAT_NOIDLE          2
+#define KERN_SCHEDSTAT_STOLEN          3
+#define KERN_SCHEDSTAT_CHOOSE          4
+#define KERN_SCHEDSTAT_WASIDLE         5
+#define KERN_SCHEDSTAT_NOMIGRATIONS    6
+#define KERN_SCHEDSTAT_MAXID           7
+
+#define CTL_KERN_SCHEDSTAT_NAMES {             \
+       { 0, 0 },                               \
+       { "nmigrations", CTLTYPE_QUAD },        \
+       { "noidle", CTLTYPE_QUAD },             \
+       { "stolen", CTLTYPE_QUAD },             \
+       { "choose", CTLTYPE_QUAD },             \
+       { "wasidle", CTLTYPE_QUAD },            \
+       { "nomigrations", CTLTYPE_QUAD }        \
+}
 /*
  * CPU states.
  * XXX Not really scheduler state, but no other good place to put
Index: sys/sys//sysctl.h
===================================================================
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.117
diff -d -u -p -w -r1.117 sysctl.h
--- sys/sys//sysctl.h   30 Aug 2011 01:09:29 -0000      1.117
+++ sys/sys//sysctl.h   12 Nov 2011 13:40:45 -0000
@@ -189,7 +189,8 @@ struct ctlname {
 #define        KERN_CONSDEV            75      /* dev_t: console terminal 
device */
 #define        KERN_NETLIVELOCKS       76      /* int: number of network 
livelocks */
 #define        KERN_POOL_DEBUG         77      /* int: enable pool_debug */
-#define        KERN_MAXID              78      /* number of valid kern ids */
+#define KERN_SCHEDSTAT         78      /* struct: sched statistics */
+#define        KERN_MAXID              79      /* number of valid kern ids */
 
 #define        CTL_KERN_NAMES { \
        { 0, 0 }, \
@@ -270,6 +271,7 @@ struct ctlname {
        { "consdev", CTLTYPE_STRUCT }, \
        { "netlivelocks", CTLTYPE_INT }, \
        { "pool_debug", CTLTYPE_INT }, \
+       { "schedstat", CTLTYPE_STRUCT }, \
 }
 
 /*
Index: sys/kern//kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.24
diff -d -u -p -w -r1.24 kern_sched.c
--- sys/kern//kern_sched.c      12 Oct 2011 18:30:09 -0000      1.24
+++ sys/kern//kern_sched.c      12 Nov 2011 14:41:59 -0000
@@ -35,6 +35,8 @@ void sched_kthreads_create(void *);
 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
 struct proc *sched_steal_proc(struct cpu_info *);
 
+struct schedstat schedstat;
+
 /*
  * To help choosing which cpu should run which process we keep track
  * of cpus which are currently idle and which cpus have processes
@@ -301,14 +303,6 @@ again:
        return (p);     
 }
 
-uint64_t sched_nmigrations;
-uint64_t sched_noidle;
-uint64_t sched_stolen;
-
-uint64_t sched_choose;
-uint64_t sched_wasidle;
-uint64_t sched_nomigrations;
-
 struct cpu_info *
 sched_choosecpu_fork(struct proc *parent, int flags)
 {
@@ -374,7 +368,7 @@ sched_choosecpu(struct proc *p)
        if (p->p_flag & P_CPUPEG)
                return (p->p_cpu);
 
-       sched_choose++;
+       schedstat.scs_choose++;
 
        /*
         * Look at all cpus that are currently idle and have nothing queued.
@@ -393,7 +387,7 @@ sched_choosecpu(struct proc *p)
        if (cpuset_isset(&set, p->p_cpu) ||
            (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
            curproc == p)) {
-               sched_wasidle++;
+               schedstat.scs_wasidle++;
                return (p->p_cpu);
        }
 
@@ -411,9 +405,9 @@ sched_choosecpu(struct proc *p)
        }
 
        if (p->p_cpu != choice)
-               sched_nmigrations++;
+               schedstat.scs_nmigrations++;
        else
-               sched_nomigrations++;
+               schedstat.scs_nomigrations++;
 
        return (choice);
 }
@@ -461,7 +455,7 @@ sched_steal_proc(struct cpu_info *self)
        remrunqueue(best);
        best->p_cpu = self;
 
-       sched_stolen++;
+       schedstat.scs_stolen++;
 
        return (best);
 }
Index: sys/kern//kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.208
diff -d -u -p -w -r1.208 kern_sysctl.c
--- sys/kern//kern_sysctl.c     18 Sep 2011 13:23:38 -0000      1.208
+++ sys/kern//kern_sysctl.c     12 Nov 2011 15:04:52 -0000
@@ -112,6 +112,7 @@ extern struct disklist_head disklist;
 extern fixpt_t ccpu;
 extern  long numvnodes;
 extern u_int mcllivelocks;
+extern struct schedstat schedstat;
 
 extern void nmbclust_update(void);
 
@@ -602,6 +603,9 @@ kern_sysctl(int *name, u_int namelen, vo
                        pool_reclaim_all();
                return (error);
        }
+       case KERN_SCHEDSTAT:
+               return (sysctl_rdstruct(oldp, oldlenp, newp, &schedstat,
+                   sizeof(struct schedstat)));
        default:
                return (EOPNOTSUPP);
        }
Index: sbin/sysctl//sysctl.c
===================================================================
RCS file: /cvs/src/sbin/sysctl/sysctl.c,v
retrieving revision 1.180
diff -d -u -p -w -r1.180 sysctl.c
--- sbin/sysctl//sysctl.c       16 Sep 2011 20:52:48 -0000      1.180
+++ sbin/sysctl//sysctl.c       12 Nov 2011 14:22:19 -0000
@@ -119,6 +119,7 @@ struct ctlname username[] = CTL_USER_NAM
 struct ctlname debugname[CTL_DEBUG_MAXID];
 struct ctlname kernmallocname[] = CTL_KERN_MALLOC_NAMES;
 struct ctlname forkstatname[] = CTL_KERN_FORKSTAT_NAMES;
+struct ctlname schedstatname[] = CTL_KERN_SCHEDSTAT_NAMES;
 struct ctlname nchstatsname[] = CTL_KERN_NCHSTATS_NAMES;
 struct ctlname ttysname[] = CTL_KERN_TTY_NAMES;
 struct ctlname semname[] = CTL_KERN_SEMINFO_NAMES;
@@ -198,6 +199,7 @@ static int sysctl_vfsgen(char *, char **
 int sysctl_bios(char *, char **, int *, int, int *);
 int sysctl_swpenc(char *, char **, int *, int, int *);
 int sysctl_forkstat(char *, char **, int *, int, int *);
+int sysctl_schedstat(char *, char **, int *, int, int *);
 int sysctl_tty(char *, char **, int *, int, int *);
 int sysctl_nchstats(char *, char **, int *, int, int *);
 int sysctl_malloc(char *, char **, int *, int, int *);
@@ -450,6 +452,9 @@ parse(char *string, int flags)
                case KERN_NETLIVELOCKS:
                        special |= UNSIGNED;
                        break;
+               case KERN_SCHEDSTAT:
+                       sysctl_schedstat(string, &bufp, mib, flags, &type);
+                       return;
                }
                break;
 
@@ -1622,6 +1627,7 @@ struct list pipexlist = { pipexname, PIP
 
 struct list kernmalloclist = { kernmallocname, KERN_MALLOC_MAXID };
 struct list forkstatlist = { forkstatname, KERN_FORKSTAT_MAXID };
+struct list schedstatlist = { schedstatname, KERN_SCHEDSTAT_MAXID };
 struct list nchstatslist = { nchstatsname, KERN_NCHSTATS_MAXID };
 struct list ttylist = { ttysname, KERN_TTY_MAXID };
 struct list semlist = { semname, KERN_SEMINFO_MAXID };
@@ -1891,6 +1897,62 @@ sysctl_malloc(char *string, char **bufpp
                *typep = CTLTYPE_STRING;
                return (3);
        }
+       return (-1);
+}
+
+/*
+ * handle scheduler statistics
+ */
+int
+sysctl_schedstat(char *string, char **bufpp, int mib[], int flags, int *typep)
+{
+       static struct schedstat scs;
+       static int keepvalue = 0;
+       int indx;
+       size_t size;
+
+       if (*bufpp == NULL) {
+               bzero(&scs, sizeof(scs));
+               listall(string, &schedstatlist);
+               return (-1);
+       }
+       if ((indx = findname(string, "third", bufpp, &schedstatlist)) == -1)
+               return (-1);
+       if (*bufpp != NULL) {
+               warnx("fourth level name in %s is invalid", string);
+               return (-1);
+       }
+       if (keepvalue == 0) {
+               size = sizeof(scs);
+               if (sysctl(mib, 2, &scs, &size, NULL, 0) < 0)
+                       return (-1);
+               keepvalue = 1;
+       }
+       if (!nflag)
+               (void)printf("%s%s", string, equ);
+       switch (indx)   {
+       case KERN_SCHEDSTAT_NMIGRATIONS:
+               (void)printf("%llu\n", scs.scs_nmigrations);
+               break;
+       case KERN_SCHEDSTAT_NOIDLE:
+               (void)printf("%llu\n", scs.scs_noidle);
+               break;
+       case KERN_SCHEDSTAT_STOLEN:
+               (void)printf("%llu\n", scs.scs_stolen);
+               break;
+       case KERN_SCHEDSTAT_CHOOSE:
+               (void)printf("%llu\n", scs.scs_choose);
+               break;
+       case KERN_SCHEDSTAT_WASIDLE:
+               (void)printf("%llu\n", scs.scs_wasidle);
+               break;
+       case KERN_SCHEDSTAT_NOMIGRATIONS:
+               (void)printf("%llu\n", scs.scs_nomigrations);
+               break;
+       default:
+               break;
+       }
+       
        return (-1);
 }

Reply via email to