On Wed, Jul 26, 2023 at 11:16:19AM -0500, Scott Cheloha wrote:
> This is the next patch in the clock interrupt reorganization series.
>
> Now that statclock() is cleaned up we can turn to hardclock().
>
> [...]
>
> This patch moves the setitimer(2) code out of hardclock(). The big
> idea is identical to what we did with profil(2)/profclock in the
> profclock/gmonclock patch.
>
> - Move the setitimer(2) polling code from hardclock() to a new clock
> interrupt routine, itimer_update(), in kern_time.c. itimer_update()
> is periodic and runs at the same frequency as the hardclock.
>
> - Each schedstate_percpu has its own itimer_update() handle, spc_itimer,
> initialized during sched_init_cpu().
>
> - The itimer_update() on a given CPU is enabled/disabled in
> mi_switch()/sched_exit() if the running thread's process has enabled
> ITIMER_VIRTUAL/ITIMER_PROF. A new scheduler flag, SPCF_ITIMER,
> signifies whether itimer_update() was started and needs stopping.
>
> - A new per-process flag, PS_ITIMER, signifies whether any virtual
> interval timers are running. The flag is updated from the helper
> routine process_reset_itimer_flag(). We use it during mi_switch()
> to decide whether to start itimer_update() without entering itimer_mtx.
>
> - In setitimer(), call need_resched() when the process changes the
> state of ITIMER_VIRTUAL/ITIMER_PROF to force itimer_update() on/off.
>
> regress/sys/kern/itimer passes.
Updated patch:
- Rebase on kern_clockintr.c,v1.29 and kern_sched.c,v1.81
- Stagger spc_itimer in clockintr_cpu_init() alongside spc_profclock
until I can figure out where else to do it
ok?
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.109
diff -u -p -r1.109 kern_clock.c
--- kern/kern_clock.c 25 Jul 2023 18:16:19 -0000 1.109
+++ kern/kern_clock.c 28 Jul 2023 03:44:16 -0000
@@ -105,41 +105,12 @@ initclocks(void)
}
/*
- * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL.
- * We don't want to send signals with psignal from hardclock because it makes
- * MULTIPROCESSOR locking very complicated. Instead, to use an idea from
- * FreeBSD, we set a flag on the thread and when it goes to return to
- * userspace it signals itself.
- */
-
-/*
* The real-time timer, interrupting hz times per second.
*/
void
hardclock(struct clockframe *frame)
{
- struct proc *p;
struct cpu_info *ci = curcpu();
-
- p = curproc;
- if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) {
- struct process *pr = p->p_p;
-
- /*
- * Run current process's virtual and profile time, as needed.
- */
- if (CLKF_USERMODE(frame) &&
- timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) &&
- itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick_nsec) == 0) {
- atomic_setbits_int(&p->p_flag, P_ALRMPEND);
- need_proftick(p);
- }
- if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) &&
- itimerdecr(&pr->ps_timer[ITIMER_PROF], tick_nsec) == 0) {
- atomic_setbits_int(&p->p_flag, P_PROFPEND);
- need_proftick(p);
- }
- }
if (--ci->ci_schedstate.spc_rrticks <= 0)
roundrobin(ci);
Index: kern/kern_time.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_time.c,v
retrieving revision 1.163
diff -u -p -r1.163 kern_time.c
--- kern/kern_time.c 15 Feb 2023 10:07:50 -0000 1.163
+++ kern/kern_time.c 28 Jul 2023 03:44:16 -0000
@@ -35,6 +35,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
+#include <sys/clockintr.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/proc.h>
@@ -52,6 +53,7 @@
#include <dev/clock_subr.h>
int itimerfix(struct itimerval *);
+void process_reset_itimer_flag(struct process *);
/*
* Time of day and interval timer support.
@@ -551,6 +553,10 @@ setitimer(int which, const struct itimer
timeout_del(&pr->ps_realit_to);
}
*itimer = its;
+ if (which == ITIMER_VIRTUAL || which == ITIMER_PROF) {
+ process_reset_itimer_flag(pr);
+ need_resched(curcpu());
+ }
}
if (which == ITIMER_REAL)
@@ -729,47 +735,70 @@ itimerfix(struct itimerval *itv)
}
/*
- * Decrement an interval timer by the given number of nanoseconds.
+ * Decrement an interval timer by the given duration.
* If the timer expires and it is periodic then reload it. When reloading
* the timer we subtract any overrun from the next period so that the timer
* does not drift.
*/
int
-itimerdecr(struct itimerspec *itp, long nsec)
+itimerdecr(struct itimerspec *itp, const struct timespec *decrement)
{
- struct timespec decrement;
-
- NSEC_TO_TIMESPEC(nsec, &decrement);
-
- mtx_enter(&itimer_mtx);
-
- /*
- * Double-check that the timer is enabled. A different thread
- * in setitimer(2) may have disabled it while we were entering
- * the mutex.
- */
- if (!timespecisset(&itp->it_value)) {
- mtx_leave(&itimer_mtx);
- return (1);
- }
-
- /*
- * The timer is enabled. Update and reload it as needed.
- */
- timespecsub(&itp->it_value, &decrement, &itp->it_value);
- if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value)) {
- mtx_leave(&itimer_mtx);
+ timespecsub(&itp->it_value, decrement, &itp->it_value);
+ if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value))
return (1);
- }
if (!timespecisset(&itp->it_interval)) {
timespecclear(&itp->it_value);
- mtx_leave(&itimer_mtx);
return (0);
}
while (itp->it_value.tv_sec < 0 || !timespecisset(&itp->it_value))
timespecadd(&itp->it_value, &itp->it_interval, &itp->it_value);
- mtx_leave(&itimer_mtx);
return (0);
+}
+
+void
+itimer_update(struct clockintr *cl, void *cf)
+{
+ struct timespec elapsed;
+ uint64_t nsecs;
+ struct clockframe *frame = cf;
+ struct proc *p = curproc;
+ struct process *pr;
+
+ if (p == NULL || ISSET(p->p_flag, P_SYSTEM | P_WEXIT))
+ return;
+
+ pr = p->p_p;
+ if (!ISSET(pr->ps_flags, PS_ITIMER))
+ return;
+
+ nsecs = clockintr_advance(cl, hardclock_period) * hardclock_period;
+ NSEC_TO_TIMESPEC(nsecs, &elapsed);
+
+ mtx_enter(&itimer_mtx);
+ if (CLKF_USERMODE(frame) &&
+ timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], &elapsed) == 0) {
+ process_reset_itimer_flag(pr);
+ atomic_setbits_int(&p->p_flag, P_ALRMPEND);
+ need_proftick(p);
+ }
+ if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pr->ps_timer[ITIMER_PROF], &elapsed) == 0) {
+ process_reset_itimer_flag(pr);
+ atomic_setbits_int(&p->p_flag, P_PROFPEND);
+ need_proftick(p);
+ }
+ mtx_leave(&itimer_mtx);
+}
+
+void
+process_reset_itimer_flag(struct process *ps)
+{
+ if (timespecisset(&ps->ps_timer[ITIMER_VIRTUAL].it_value) ||
+ timespecisset(&ps->ps_timer[ITIMER_PROF].it_value))
+ atomic_setbits_int(&ps->ps_flags, PS_ITIMER);
+ else
+ atomic_clearbits_int(&ps->ps_flags, PS_ITIMER);
}
struct mutex ratecheck_mtx = MUTEX_INITIALIZER(IPL_HIGH);
Index: kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.29
diff -u -p -r1.29 kern_clockintr.c
--- kern/kern_clockintr.c 27 Jul 2023 17:52:53 -0000 1.29
+++ kern/kern_clockintr.c 28 Jul 2023 03:44:16 -0000
@@ -196,6 +196,10 @@ clockintr_cpu_init(const struct intrcloc
* XXX Need to find a better place to do this. We can't do it in
* sched_init_cpu() because initclocks() runs after it.
*/
+ if (spc->spc_itimer->cl_expiration == 0) {
+ clockintr_stagger(spc->spc_itimer, hardclock_period,
+ multiplier, MAXCPUS);
+ }
if (spc->spc_profclock->cl_expiration == 0) {
clockintr_stagger(spc->spc_profclock, profclock_period,
multiplier, MAXCPUS);
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.81
diff -u -p -r1.81 kern_sched.c
--- kern/kern_sched.c 27 Jul 2023 17:52:53 -0000 1.81
+++ kern/kern_sched.c 28 Jul 2023 03:44:16 -0000
@@ -87,6 +87,14 @@ sched_init_cpu(struct cpu_info *ci)
spc->spc_idleproc = NULL;
+ if (spc->spc_itimer == NULL) {
+ spc->spc_itimer = clockintr_establish(&ci->ci_queue,
+ itimer_update);
+ if (spc->spc_itimer == NULL) {
+ panic("%s: clockintr_establish itimer_update",
+ __func__);
+ }
+ }
if (spc->spc_profclock == NULL) {
spc->spc_profclock = clockintr_establish(&ci->ci_queue,
profclock);
@@ -223,6 +231,10 @@ sched_exit(struct proc *p)
timespecsub(&ts, &spc->spc_runtime, &ts);
timespecadd(&p->p_rtime, &ts, &p->p_rtime);
+ if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
+ atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
+ clockintr_cancel(spc->spc_itimer);
+ }
if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
clockintr_cancel(spc->spc_profclock);
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.78
diff -u -p -r1.78 sched_bsd.c
--- kern/sched_bsd.c 25 Jul 2023 18:16:19 -0000 1.78
+++ kern/sched_bsd.c 28 Jul 2023 03:44:16 -0000
@@ -350,7 +350,11 @@ mi_switch(void)
/* add the time counts for this thread to the process's total */
tuagg_unlocked(pr, p);
- /* Stop the profclock if it's running. */
+ /* Stop any optional clock interrupts. */
+ if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
+ atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
+ clockintr_cancel(spc->spc_itimer);
+ }
if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
clockintr_cancel(spc->spc_profclock);
@@ -400,7 +404,13 @@ mi_switch(void)
*/
KASSERT(p->p_cpu == curcpu());
- /* Start the profclock if profil(2) is enabled. */
+ /* Start any optional clock interrupts needed by the thread. */
+ if (ISSET(p->p_p->ps_flags, PS_ITIMER)) {
+ atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags,
+ SPCF_ITIMER);
+ clockintr_advance(p->p_cpu->ci_schedstate.spc_itimer,
+ hardclock_period);
+ }
if (ISSET(p->p_p->ps_flags, PS_PROFIL)) {
atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags,
SPCF_PROFCLOCK);
Index: sys/time.h
===================================================================
RCS file: /cvs/src/sys/sys/time.h,v
retrieving revision 1.63
diff -u -p -r1.63 time.h
--- sys/time.h 13 Dec 2022 17:30:36 -0000 1.63
+++ sys/time.h 28 Jul 2023 03:44:16 -0000
@@ -330,8 +330,10 @@ uint64_t getnsecuptime(void);
struct proc;
int clock_gettime(struct proc *, clockid_t, struct timespec *);
+struct clockintr;
+void itimer_update(struct clockintr *, void *);
+
void cancel_all_itimers(void);
-int itimerdecr(struct itimerspec *, long);
int settime(const struct timespec *);
int ratecheck(struct timeval *, const struct timeval *);
int ppsratecheck(struct timeval *, int *, int);
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.58
diff -u -p -r1.58 sched.h
--- sys/sched.h 25 Jul 2023 18:16:19 -0000 1.58
+++ sys/sched.h 28 Jul 2023 03:44:16 -0000
@@ -107,6 +107,7 @@ struct schedstate_percpu {
u_char spc_curpriority; /* usrpri of curproc */
int spc_rrticks; /* ticks until roundrobin() */
+ struct clockintr *spc_itimer; /* [o] itimer_update handle */
struct clockintr *spc_profclock; /* [o] profclock handle */
u_int spc_nrun; /* procs on the run queues */
@@ -139,6 +140,7 @@ struct cpustats {
#define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */
#define SPCF_HALTED 0x0008 /* CPU has been halted */
#define SPCF_PROFCLOCK 0x0010 /* profclock() was started */
+#define SPCF_ITIMER 0x0020 /* itimer_update() was started */
#define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue
*/
#define NICE_WEIGHT 2 /* priorities per nice level */
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.346
diff -u -p -r1.346 proc.h
--- sys/proc.h 14 Jul 2023 07:07:08 -0000 1.346
+++ sys/proc.h 28 Jul 2023 03:44:16 -0000
@@ -282,6 +282,7 @@ struct process {
#define PS_ORPHAN 0x00800000 /* Process is on an orphan list
*/
#define PS_CHROOT 0x01000000 /* Process is chrooted */
#define PS_NOBTCFI 0x02000000 /* No Branch Target CFI */
+#define PS_ITIMER 0x04000000 /* Virtual interval timers
running */
#define PS_BITS \
("\20" "\01CONTROLT" "\02EXEC" "\03INEXEC" "\04EXITING" "\05SUGID" \
@@ -289,7 +290,7 @@ struct process {
"\013WAITED" "\014COREDUMP" "\015SINGLEEXIT" "\016SINGLEUNWIND" \
"\017NOZOMBIE" "\020STOPPED" "\021SYSTEM" "\022EMBRYO" "\023ZOMBIE" \
"\024NOBROADCASTKILL" "\025PLEDGE" "\026WXNEEDED" "\027EXECPLEDGE" \
- "\030ORPHAN" "\031CHROOT" "\032NOBTCFI")
+ "\030ORPHAN" "\031CHROOT" "\032NOBTCFI" "\033ITIMER")
struct kcov_dev;
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.163
diff -u -p -r1.163 systm.h
--- sys/systm.h 14 Jul 2023 07:07:08 -0000 1.163
+++ sys/systm.h 28 Jul 2023 03:44:16 -0000
@@ -233,6 +233,8 @@ int tvtohz(const struct timeval *);
int tstohz(const struct timespec *);
void realitexpire(void *);
+extern uint32_t hardclock_period;
+
struct clockframe;
void hardclock(struct clockframe *);
void statclock(struct clockframe *);