Hi,
systat(1)'s vmstat view should not use statclock() ticks to count
elapsed time. First, ticks are very low resolution and they aren't
always of equal length. Second, we're counting the ticks from every
CPU on the system, so all the rates in the view are divided by the
number of CPUs.
Instead, compute an elapsed time with clock_gettime(2). Prefer
CLOCK_UPTIME to CLOCK_MONOTONIC to exclude any time the system is
suspended. With this change we can remove "stathz", "hertz", and the
secondary clock failure test.
For comparison, consider my laptop with 8 CPUs while it's building
clang.
Before:
7 users Load 11.96 8.89 4.08 jetsam.attlocal.net 18:05:37
memory totals (in KB) PAGING SWAPPING Interrupts
real virtual free in out in out 234 total
Active 1352876 1352876 10132884 ops acpi0
All 5974204 5974204 43687312 pages 1 inteldrm
8 xhci0
Proc:r d s w Csw Trp Sys Int Sof Flt forks 3 iwm0
9 157 27 2640 1320 13 35 2634 fkppw nvme0
fksvm azalia0
0.1%Int 0.6%Spn 3.4%Sys 95.9%Usr 0.0%Idle pwait pckbc0
| | | | | | | | | | | relck pckbc0
==>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> rlkok 200 clock
noram 22 ipi
Namei Sys-cache Proc-cache No-cache 210 ndcpy
Calls hits % hits % miss % fltcp
36633 35567 97 1066 3 2466 zfod
cow
Disks sd0 sd1 134225 fmin
seeks 178966 ftarg
xfers 2 itarg
speed 32K 11781 wired
sec 0.0 pdfre
pdscn
-2549 pzidl 2 IPKTS
37 kmape OPKTS
After:
7 users Load 10.80 8.87 4.21 jetsam.attlocal.net 18:06:04
memory totals (in KB) PAGING SWAPPING Interrupts
real virtual free in out in out 1867 total
Active 1920708 1920708 9534016 ops acpi0
All 6573072 6573072 43088444 pages 12 inteldrm
62 xhci0
Proc:r d s w Csw Trp Sys Int Sof Flt forks 21 iwm0
9 157 213 49039 6150 102 27349046 fkppw 7 nvme0
fksvm azalia0
0.0%Int 1.5%Spn 4.5%Sys 94.1%Usr 0.0%Idle pwait pckbc0
| | | | | | | | | | | 5 relck pckbc0
@==>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 5 rlkok 1600 clock
noram 165 ipi
Namei Sys-cache Proc-cache No-cache 1966 ndcpy
Calls hits % hits % miss % fltcp
26320 26198 100 122 0 48171 zfod
cow
Disks sd0 sd1 134225 fmin
seeks 178966 ftarg
xfers 7 itarg
speed 112K 12805 wired
sec 0.0 pdfre
pdscn
-45812 pzidl 2 IPKTS
36 kmape OPKTS
One standout difference is the "clock" interrupt rate. It is off by a
factor of 8 in the first view and corrected in the second.
CC jmatthew@ and dlg@, who apparently spent some time trying to debug
the wrong problem because of this.
ok?
Index: main.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/main.c,v
retrieving revision 1.76
diff -u -p -r1.76 main.c
--- main.c 12 Jul 2021 15:09:20 -0000 1.76
+++ main.c 21 Nov 2022 00:15:12 -0000
@@ -65,7 +65,7 @@ double avenrun[3];
double naptime = 5.0;
int verbose = 1; /* to report kvm read errs */
int nflag = 1;
-int ut, hz, stathz;
+int ut, hz;
char hostname[HOST_NAME_MAX+1];
WINDOW *wnd;
int CMDLINE;
@@ -414,7 +414,6 @@ gethz(void)
mib[1] = KERN_CLOCKRATE;
if (sysctl(mib, 2, &cinf, &size, NULL, 0) == -1)
return;
- stathz = cinf.stathz;
hz = cinf.hz;
}
Index: systat.h
===================================================================
RCS file: /cvs/src/usr.bin/systat/systat.h,v
retrieving revision 1.24
diff -u -p -r1.24 systat.h
--- systat.h 18 Jan 2021 00:49:09 -0000 1.24
+++ systat.h 21 Nov 2022 00:15:12 -0000
@@ -58,7 +58,7 @@ extern kvm_t *kd;
extern long ntext;
extern int *dk_select;
extern int dk_ndrive;
-extern int hz, stathz;
+extern int hz;
extern double naptime;
extern size_t nhosts;
extern size_t nports;
Index: vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/vmstat.c,v
retrieving revision 1.94
diff -u -p -r1.94 vmstat.c
--- vmstat.c 22 Feb 2022 17:35:01 -0000 1.94
+++ vmstat.c 21 Nov 2022 00:15:13 -0000
@@ -98,7 +98,6 @@ int select_vm(void);
int vm_keyboard_callback(int);
static time_t t;
-static float hertz;
static int nintr;
static long *intrloc;
static char **intrname;
@@ -170,7 +169,6 @@ initvmstat(void)
int mib[4], i;
size_t size;
- hertz = stathz;
if (!dkinit(1))
return(0);
@@ -323,7 +321,6 @@ labelkre(void)
Y(fld); \
putint((int)((float)s.fld/etime + 0.5), l, c, w); \
} while (0)
-#define MAXFAIL 5
static char cpuchar[] = { '|', '@', '=', '>', ' ' };
static char cpuorder[] = { CP_INTR, CP_SPIN, CP_SYS, CP_USER, CP_IDLE };
@@ -331,33 +328,27 @@ static char cpuorder[] = { CP_INTR, CP_S
void
showkre(void)
{
+ static struct timespec prev;
+ struct timespec elapsed, now;
float f1, f2;
int psiz;
u_int64_t inttotal, intcnt;
int i, l, c;
- static int failcnt = 0, first_run = 0;
+ static int first_run = 0;
double etime;
+ clock_gettime(CLOCK_UPTIME, &now);
+ timespecsub(&now, &prev, &elapsed);
+ prev = now;
if (state == TIME) {
if (!first_run) {
first_run = 1;
return;
}
}
- etime = 0;
- for (i = 0; i < CPUSTATES; i++) {
+ etime = elapsed.tv_sec + elapsed.tv_nsec / 1000000000.0;
+ for (i = 0; i < CPUSTATES; i++)
X(cpustats.cs_time);
- etime += s.cpustats.cs_time[i];
- }
- if (etime < 5.0) { /* < 5 ticks - ignore this trash */
- if (failcnt++ >= MAXFAIL) {
- error("The alternate system clock has died!");
- failcnt = 0;
- }
- return;
- }
- failcnt = 0;
- etime /= hertz;
inttotal = 0;
for (i = 0; i < nintr; i++) {
t = intcnt = s.intrcnt[i];