This is the next patch in the clock interrupt reorganization series.
This patch moves the entry points for the interval and profile dt(4)
providers from the hardclock(9) to a dedicated clock interrupt
callback, dt_prov_profile_intr(), in dev/dt/dt_prov_profile.c.
- To preserve current behavior, (1) both provider entrypoints have
been moved into a single callback function, (2) the interrupt runs at
the same frequency as the hardclock, and (3) the interrupt is
staggered to co-occur with the hardclock on a given CPU.
All of this can be changed later. This patch is strictly
logistical: moving these parts out of the hardclock.
- Each dt_pcb gets a provider-specific "dp_clockintr" pointer. If the
PCB's implementing provider needs a clock interrupt to do its work, it
stores the handle in dp_clockintr. The handle, if any, is established
during dtpv_alloc() and disestablished during dtpv_dealloc().
Only the interval and profile providers use it at present.
- In order to implement dt_prov_profile_dealloc() I needed a complete
definition of struct dt_softc, so I hoisted it up out of dt_dev.c
into dtvar.h.
- A PCB's periodic clock interrupt, if any, is started during
dt_ioctl_record_start() stopped during dt_ioctl_record_stop().
This is not a provider-agnostic approach, but I don't see where
else to start/stop the clock interrupt.
One alternative is to start running the clock interrupts when they
are allocated in dtpv_alloc() and stop them when they are freed in
dtpv_dealloc(). This is wasteful, though: the PCBs are not recording
yet, so the interrupts won't perform any useful work until the
controlling process enables recording.
An additional pair of provider hooks, e.g. "dtpv_record_start" and
"dtpv_record_stop", might resolve this.
- We haven't needed to destroy clock interrupts yet, so the
clockintr_disestablish() function used in this patch is new.
The implementation is extremely similar to that of clockintr_cancel().
However, for sake of simplicity, a callback may not disestablish its
clockintr while the callback is running.
One tricky part: if a clockintr is disestablished while it is running,
the dispatch thread takes care not to use-after-free when it re-enters
the clockintr_queue mutex.
I have tested this on amd64. It seems to work: the clock interrupts
fire, stack traces are recorded, and the handles are deallocated when
the process closes the file descriptor.
I will be testing it on other platforms over the next few days.
Thoughts? Test results?
Index: kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.32
diff -u -p -r1.32 kern_clockintr.c
--- kern/kern_clockintr.c 21 Aug 2023 17:22:04 -0000 1.32
+++ kern/kern_clockintr.c 23 Aug 2023 23:35:11 -0000
@@ -218,7 +218,7 @@ clockintr_dispatch(void *frame)
{
uint64_t lateness, run = 0, start;
struct cpu_info *ci = curcpu();
- struct clockintr *cl;
+ struct clockintr *cl, *shadow;
struct clockintr_queue *cq = &ci->ci_queue;
u_int ogen;
@@ -257,22 +257,26 @@ clockintr_dispatch(void *frame)
break;
}
clockintr_cancel_locked(cl);
- cq->cq_shadow.cl_expiration = cl->cl_expiration;
+ shadow = &cq->cq_shadow;
+ shadow->cl_expiration = cl->cl_expiration;
+ shadow->cl_func = cl->cl_func;
cq->cq_running = cl;
mtx_leave(&cq->cq_mtx);
- cl->cl_func(&cq->cq_shadow, frame);
+ shadow->cl_func(shadow, frame);
mtx_enter(&cq->cq_mtx);
- cq->cq_running = NULL;
- if (ISSET(cl->cl_flags, CLST_IGNORE_SHADOW)) {
- CLR(cl->cl_flags, CLST_IGNORE_SHADOW);
- CLR(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING);
- }
- if (ISSET(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING)) {
- CLR(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING);
- clockintr_schedule_locked(cl,
- cq->cq_shadow.cl_expiration);
+ if (cq->cq_running != NULL) {
+ cq->cq_running = NULL;
+ if (ISSET(cl->cl_flags, CLST_IGNORE_SHADOW)) {
+ CLR(cl->cl_flags, CLST_IGNORE_SHADOW);
+ CLR(shadow->cl_flags, CLST_SHADOW_PENDING);
+ }
+ if (ISSET(shadow->cl_flags, CLST_SHADOW_PENDING)) {
+ CLR(shadow->cl_flags, CLST_SHADOW_PENDING);
+ clockintr_schedule_locked(cl,
+ shadow->cl_expiration);
+ }
}
run++;
}
@@ -382,6 +386,34 @@ clockintr_cancel_locked(struct clockintr
TAILQ_REMOVE(&cq->cq_pend, cl, cl_plink);
CLR(cl->cl_flags, CLST_PENDING);
+}
+
+void
+clockintr_disestablish(struct clockintr *cl)
+{
+ struct clockintr_queue *cq = cl->cl_queue;
+ int was_next;
+
+ if (cl == &cq->cq_shadow)
+ panic("%s: called during dispatch\n", __func__);
+
+ mtx_enter(&cq->cq_mtx);
+ if (ISSET(cl->cl_flags, CLST_PENDING)) {
+ was_next = cl == TAILQ_FIRST(&cq->cq_pend);
+ clockintr_cancel_locked(cl);
+ if (ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
+ if (was_next && !TAILQ_EMPTY(&cq->cq_pend)) {
+ if (cq == &curcpu()->ci_queue)
+ clockqueue_reset_intrclock(cq);
+ }
+ }
+ }
+ if (cl == cq->cq_running)
+ cq->cq_running = NULL;
+ TAILQ_REMOVE(&cq->cq_est, cl, cl_elink);
+ mtx_leave(&cq->cq_mtx);
+
+ free(cl, M_DEVBUF, sizeof *cl);
}
struct clockintr *
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.115
diff -u -p -r1.115 kern_clock.c
--- kern/kern_clock.c 23 Aug 2023 01:55:45 -0000 1.115
+++ kern/kern_clock.c 23 Aug 2023 23:35:11 -0000
@@ -49,11 +49,6 @@
#include <sys/sched.h>
#include <sys/timetc.h>
-#include "dt.h"
-#if NDT > 0
-#include <dev/dt/dtvar.h>
-#endif
-
/*
* Clock handling routines.
*
@@ -114,12 +109,6 @@ initclocks(void)
void
hardclock(struct clockframe *frame)
{
-#if NDT > 0
- DT_ENTER(profile, NULL);
- if (CPU_IS_PRIMARY(curcpu()))
- DT_ENTER(interval, NULL);
-#endif
-
/*
* If we are not the primary CPU, we're not allowed to do
* any more work.
Index: sys/clockintr.h
===================================================================
RCS file: /cvs/src/sys/sys/clockintr.h,v
retrieving revision 1.10
diff -u -p -r1.10 clockintr.h
--- sys/clockintr.h 21 Aug 2023 17:22:04 -0000 1.10
+++ sys/clockintr.h 23 Aug 2023 23:35:11 -0000
@@ -128,6 +128,7 @@ void clockintr_trigger(void);
uint64_t clockintr_advance(struct clockintr *, uint64_t);
void clockintr_cancel(struct clockintr *);
+void clockintr_disestablish(struct clockintr *);
struct clockintr *clockintr_establish(struct clockintr_queue *,
void (*)(struct clockintr *, void *));
void clockintr_stagger(struct clockintr *, uint64_t, u_int, u_int);
Index: dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
retrieving revision 1.5
diff -u -p -r1.5 dt_prov_profile.c
--- dev/dt/dt_prov_profile.c 26 Apr 2023 16:53:59 -0000 1.5
+++ dev/dt/dt_prov_profile.c 23 Aug 2023 23:35:13 -0000
@@ -20,6 +20,7 @@
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/atomic.h>
+#include <sys/clockintr.h>
#include <dev/dt/dtvar.h>
@@ -31,6 +32,8 @@ struct dt_probe *dtpp_interval; /* glob
int dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
struct dt_pcb_list *, struct dtioc_req *);
+int dt_prov_profile_dealloc(struct dt_probe *, struct dt_softc *,
+ struct dtioc_req *);
int dt_prov_profile_enter(struct dt_provider *, ...);
int dt_prov_interval_enter(struct dt_provider *, ...);
@@ -39,7 +42,7 @@ struct dt_provider dt_prov_profile = {
.dtpv_alloc = dt_prov_profile_alloc,
.dtpv_enter = dt_prov_profile_enter,
.dtpv_leave = NULL,
- .dtpv_dealloc = NULL,
+ .dtpv_dealloc = dt_prov_profile_dealloc,
};
struct dt_provider dt_prov_interval = {
@@ -47,9 +50,11 @@ struct dt_provider dt_prov_interval = {
.dtpv_alloc = dt_prov_profile_alloc,
.dtpv_enter = dt_prov_interval_enter,
.dtpv_leave = NULL,
- .dtpv_dealloc = NULL,
+ .dtpv_dealloc = dt_prov_profile_dealloc,
};
+void dt_prov_profile_intr(struct clockintr *, void *);
+
int
dt_prov_profile_init(void)
{
@@ -85,10 +90,17 @@ dt_prov_profile_alloc(struct dt_probe *d
continue;
dp = dt_pcb_alloc(dtp, sc);
- if (dp == NULL) {
- dt_pcb_purge(plist);
- return ENOMEM;
+ if (dp == NULL)
+ goto purge;
+
+ dp->dp_clockintr = clockintr_establish(&ci->ci_queue,
+ dt_prov_profile_intr);
+ if (dp->dp_clockintr == NULL) {
+ dt_pcb_free(dp);
+ goto purge;
}
+ clockintr_stagger(dp->dp_clockintr, hardclock_period,
+ CPU_INFO_UNIT(ci), MAXCPUS);
dp->dp_maxtick = hz / dtrq->dtrq_rate;
dp->dp_cpuid = ci->ci_cpuid;
@@ -99,6 +111,31 @@ dt_prov_profile_alloc(struct dt_probe *d
}
return 0;
+purge:
+ TAILQ_FOREACH(dp, plist, dp_snext)
+ clockintr_disestablish(dp->dp_clockintr);
+ dt_pcb_purge(plist);
+ return ENOMEM;
+}
+
+int
+dt_prov_profile_dealloc(struct dt_probe *dtp, struct dt_softc *sc,
+ struct dtioc_req *dtrq)
+{
+ struct dt_pcb *dp;
+
+ KASSERT(dtioc_req_isvalid(dtrq));
+ KASSERT(dtp == dtpp_profile || dtp == dtpp_interval);
+
+ TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
+ if (dp->dp_dtp == dtp) {
+ if (dp->dp_clockintr != NULL) {
+ clockintr_disestablish(dp->dp_clockintr);
+ dp->dp_clockintr = NULL;
+ }
+ }
+ }
+ return 0;
}
static inline void
@@ -148,4 +185,18 @@ dt_prov_interval_enter(struct dt_provide
}
smr_read_leave();
return 0;
+}
+
+void
+dt_prov_profile_intr(struct clockintr *cl, void *cf)
+{
+ uint64_t count, i;
+ struct cpu_info *ci = curcpu();
+
+ count = clockintr_advance(cl, hardclock_period);
+ for (i = 0; i < count; i++) {
+ DT_ENTER(profile, NULL);
+ if (CPU_IS_PRIMARY(ci))
+ DT_ENTER(interval, NULL);
+ }
}
Index: dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.28
diff -u -p -r1.28 dt_dev.c
--- dev/dt/dt_dev.c 14 Jul 2023 07:07:08 -0000 1.28
+++ dev/dt/dt_dev.c 23 Aug 2023 23:35:14 -0000
@@ -19,6 +19,7 @@
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
+#include <sys/clockintr.h>
#include <sys/device.h>
#include <sys/exec_elf.h>
#include <sys/malloc.h>
@@ -82,32 +83,6 @@
#define DPRINTF(x...) /* nothing */
-/*
- * Descriptor associated with each program opening /dev/dt. It is used
- * to keep track of enabled PCBs.
- *
- * Locks used to protect struct members in this file:
- * m per-softc mutex
- * K kernel lock
- */
-struct dt_softc {
- SLIST_ENTRY(dt_softc) ds_next; /* [K] descriptor list */
- int ds_unit; /* [I] D_CLONE unique unit */
- pid_t ds_pid; /* [I] PID of tracing program */
-
- struct mutex ds_mtx;
-
- struct dt_pcb_list ds_pcbs; /* [K] list of enabled PCBs */
- struct dt_evt *ds_bufqueue; /* [K] copy evts to userland */
- size_t ds_bufqlen; /* [K] length of the queue */
- int ds_recording; /* [K] currently recording? */
- int ds_evtcnt; /* [m] # of readable evts */
-
- /* Counters */
- uint64_t ds_readevt; /* [m] # of events read */
- uint64_t ds_dropevt; /* [m] # of events dropped */
-};
-
SLIST_HEAD(, dt_softc) dtdev_list; /* [K] list of open /dev/dt nodes */
/*
@@ -486,6 +461,14 @@ dt_ioctl_record_start(struct dt_softc *s
SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
dtp->dtp_recording++;
dtp->dtp_prov->dtpv_recording++;
+
+ /*
+ * XXX Not provider-agnostic. Is there a better place
+ * to do this?
+ */
+ if (dp->dp_clockintr != NULL)
+ clockintr_advance(dp->dp_clockintr, hardclock_period);
+
}
rw_exit_write(&dt_lock);
@@ -514,6 +497,11 @@ dt_ioctl_record_stop(struct dt_softc *sc
dtp->dtp_recording--;
dtp->dtp_prov->dtpv_recording--;
+
+ /* XXX This is not provider-agnostic. */
+ if (dp->dp_clockintr != NULL)
+ clockintr_cancel(dp->dp_clockintr);
+
SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
}
rw_exit_write(&dt_lock);
Index: dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
retrieving revision 1.17
diff -u -p -r1.17 dtvar.h
--- dev/dt/dtvar.h 26 Apr 2023 16:53:59 -0000 1.17
+++ dev/dt/dtvar.h 23 Aug 2023 23:35:14 -0000
@@ -158,12 +158,39 @@ struct dtioc_getaux {
#include <sys/queue.h>
#include <sys/smr.h>
+struct dt_pcb;
+TAILQ_HEAD(dt_pcb_list, dt_pcb);
+
/* Flags that make sense for all providers. */
#define DTEVT_COMMON (DTEVT_EXECNAME|DTEVT_KSTACK|DTEVT_USTACK)
#define M_DT M_DEVBUF /* XXX FIXME */
-struct dt_softc;
+/*
+ * Descriptor associated with each program opening /dev/dt. It is used
+ * to keep track of enabled PCBs.
+ *
+ * Locks used to protect struct members in this file:
+ * m per-softc mutex
+ * K kernel lock
+ */
+struct dt_softc {
+ SLIST_ENTRY(dt_softc) ds_next; /* [K] descriptor list */
+ int ds_unit; /* [I] D_CLONE unique unit */
+ pid_t ds_pid; /* [I] PID of tracing program */
+
+ struct mutex ds_mtx;
+
+ struct dt_pcb_list ds_pcbs; /* [K] list of enabled PCBs */
+ struct dt_evt *ds_bufqueue; /* [K] copy evts to userland */
+ size_t ds_bufqlen; /* [K] length of the queue */
+ int ds_recording; /* [K] currently recording? */
+ int ds_evtcnt; /* [m] # of readable evts */
+
+ /* Counters */
+ uint64_t ds_readevt; /* [m] # of events read */
+ uint64_t ds_dropevt; /* [m] # of events dropped */
+};
int dtioc_req_isvalid(struct dtioc_req *);
@@ -197,6 +224,7 @@ struct dt_pcb {
struct dt_filter dp_filter; /* [I] filter to match */
/* Provider specific fields. */
+ struct clockintr *dp_clockintr; /* [?] clock interrupt handle */
unsigned int dp_cpuid; /* [I] on which CPU */
unsigned int dp_maxtick; /* [I] freq. of profiling */
unsigned int dp_nticks; /* [c] current tick count */
@@ -204,8 +232,6 @@ struct dt_pcb {
/* Counters */
uint64_t dp_dropevt; /* [m] # dropped event */
};
-
-TAILQ_HEAD(dt_pcb_list, dt_pcb);
struct dt_pcb *dt_pcb_alloc(struct dt_probe *, struct dt_softc *);
void dt_pcb_free(struct dt_pcb *);