Back in the old CSRG times, in the 80s, before the introduction of
PCATCH and tsleep(9), PZERO was a clever hack.   The following comment
from sleep() in kern/kern_synch.c 3.1 explains it well:

        * The most important effect of pri is that when
        * pri<=PZERO a signal cannot disturb the sleep;
        * if pri>PZERO signals will be processed.

The reason for such distinction is better explained by a comment in
sys/vmmeter.h, still present in OpenBSD: 

        u_int16_t t_dw;         /* jobs in ``disk wait'' (neg priority) */

In 2017, "neg priority" translates to a value of ``p_priority'' between 
0 and PZERO.  Back in the old times they were used for "short sleep".
That's why the original getloadavg(3) calculus always included processes
doing a short sleep in the number of running process:

        if (p->p_pri < PZERO)
                nrun++;

That's why until today we can see a lot of (PZERO + 1) or (PZERO - 1) in
the tree. 

In the 90s everything changed.  tsleep(9) and PCATCH arrived to create
more confusion about what a sleeping priority really means!  In the
version 7.7 of vm/vm_meter.c mckusick@ changed how running processes
where accounted:

        if (p->p_pri <= PZERO && p->p_slptime == 0)
                nrun++;       

The ``p_slptime'' check means that long sleeping processes (> 1sec) were
no longer included in getloadavg(3).  So in 1990 the kernel already had
processes sleeping for a "long" time with a priority <= PZERO!  PZERO
was already a lie...

The diff below get rids of this lie.  Stop counting sleeping processes
as "running".  There's no way to know if the process is sleeping for a
short period or not.  This eliminate some false positive, for example
when the idling thread or softclock thread are accounted as running.

It also simplifies uvm_total() to just report the number of sleeping
thread.

While here, update vmstat(8) to reflect this change and make it fit in
80chars.  Here's the new output:

 procs    memory       page                    disks    traps          cpu
 r   s   avm     fre  flt  re  pi  po  fr  sr sd0 sd1  int   sys   cs us sy id
 1  40  922M   3588M 3466   0   0   0   0   0  50  50  353 12810 1314  5  4 91

Index: sys/uvm/uvm_meter.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.36
diff -u -p -r1.36 uvm_meter.c
--- sys/uvm/uvm_meter.c 14 Mar 2015 03:38:53 -0000      1.36
+++ sys/uvm/uvm_meter.c 30 Apr 2017 16:37:08 -0000
@@ -108,14 +108,14 @@ uvm_loadav(struct loadavg *avg)
 
        LIST_FOREACH(p, &allproc, p_list) {
                switch (p->p_stat) {
+               case SSTOP:
                case SSLEEP:
-                       if (p->p_priority > PZERO || p->p_slptime > 1)
-                               continue;
-               /* FALLTHROUGH */
+                       break;
                case SRUN:
                case SONPROC:
                        if (p == p->p_cpu->ci_schedstate.spc_idleproc)
                                continue;
+               /* FALLTHROUGH */
                case SIDL:
                        nrun++;
                        if (p->p_cpu)
@@ -136,7 +136,7 @@ uvm_loadav(struct loadavg *avg)
                spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
                    nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *
                    (FSCALE - cexp[0])) >> FSHIFT;
-       }               
+       }
 }
 
 /*
@@ -254,24 +254,19 @@ uvm_total(struct vmtotal *totalp)
 
        /* calculate process statistics */
        LIST_FOREACH(p, &allproc, p_list) {
-               if (p->p_flag & P_SYSTEM)
-                       continue;
                switch (p->p_stat) {
                case 0:
                        continue;
 
                case SSLEEP:
                case SSTOP:
-                       if (p->p_priority <= PZERO)
-                               totalp->t_dw++;
-                       else if (p->p_slptime < maxslp)
-                               totalp->t_sl++;
-                       if (p->p_slptime >= maxslp)
-                               continue;
+                       totalp->t_sl++;
                        break;
                case SRUN:
-               case SIDL:
                case SONPROC:
+                       if (p == p->p_cpu->ci_schedstate.spc_idleproc)
+                               continue;
+               case SIDL:
                        totalp->t_rq++;
                        if (p->p_stat == SIDL)
                                continue;
Index: usr.bin/vmstat/vmstat.8
===================================================================
RCS file: /cvs/src/usr.bin/vmstat/vmstat.8,v
retrieving revision 1.37
diff -u -p -r1.37 vmstat.8
--- usr.bin/vmstat/vmstat.8     30 Mar 2016 06:58:06 -0000      1.37
+++ usr.bin/vmstat/vmstat.8     30 Apr 2017 17:24:17 -0000
@@ -121,16 +121,11 @@ Information about the numbers of process
 .Bl -tag -width 4n -compact
 .It Li r
 in run queue
-.It Li b
-blocked for resources (I/O, paging, etc.)
-.It Li w
-runnable or short sleeper (< 20 secs) but swapped
+.It Li s
+sleeping
 .El
 .It Li memory
 Information about the usage of virtual and real memory.
-Virtual pages
-(reported in units of 1024 bytes) are considered active if they belong
-to processes which are running or have run in the last 20 seconds.
 .Pp
 .Bl -tag -width 4n -compact
 .It Li avm
Index: usr.bin/vmstat/vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/vmstat/vmstat.c,v
retrieving revision 1.141
diff -u -p -r1.141 vmstat.c
--- usr.bin/vmstat/vmstat.c     14 Aug 2016 22:47:26 -0000      1.141
+++ usr.bin/vmstat/vmstat.c     30 Apr 2017 17:20:51 -0000
@@ -358,13 +358,12 @@ dovmstat(u_int interval, int reps)
                        warn("could not read vm.vmmeter");
                        memset(&total, 0, sizeof(total));
                }
-               (void)printf(" %u %u %u ",
-                   total.t_rq - 1, total.t_dw + total.t_pw, total.t_sw);
+               (void)printf("%2u %3u", total.t_rq - 1, total.t_sl);
 #define        rate(x) ((unsigned)((((unsigned)x) + halfuptime) / uptime)) /* 
round */
 #define pgtok(a) ((a) * ((unsigned int)uvmexp.pagesize >> 10))
-               (void)printf("%6u %7u ",
-                   pgtok(uvmexp.active + uvmexp.swpginuse),
-                   pgtok(uvmexp.free));
+               (void)printf("%5uM %6uM ",
+                   pgtok(uvmexp.active + uvmexp.swpginuse) / 1024,
+                   pgtok(uvmexp.free) / 1024);
                (void)printf("%4u ", rate(uvmexp.faults - ouvmexp.faults));
                (void)printf("%3u ", rate(uvmexp.pdreact - ouvmexp.pdreact));
                (void)printf("%3u ", rate(uvmexp.pageins - ouvmexp.pageins));
@@ -410,7 +409,7 @@ printhdr(void)
                (void)printf("%*s  traps           cpu\n",
                   ndrives * 3, "");
 
-       (void)printf(" r b w    avm     fre  flt  re  pi  po  fr  sr ");
+       (void)printf(" r   s   avm     fre  flt  re  pi  po  fr  sr ");
        for (i = 0; i < dk_ndrive; i++)
                if (dk_select[i])
                        (void)printf("%c%c%c ", dr_name[i][0],

Reply via email to