Module Name: src
Committed By: ryo
Date: Thu Dec 1 00:32:52 UTC 2022
Modified Files:
src/sys/arch/aarch64/include: armreg.h
src/sys/dev/tprof: tprof.c tprof.h tprof_armv7.c tprof_armv8.c
tprof_ioctl.h tprof_types.h tprof_x86.c tprof_x86_amd.c
tprof_x86_intel.c
src/usr.sbin/tprof: tprof.8 tprof.c tprof_analyze.c
Log Message:
Improve tprof(4)
- Multiple events can now be handled simultaneously.
- Counters should be configured with TPROF_IOC_CONFIGURE_EVENT in advance,
instead of being configured at TPROF_IOC_START.
- The configured counters can be started and stopped repeatedly by
PROF_IOC_START/TPROF_IOC_STOP.
- The value of the performance counter can be obtained at any timing as a 64bit
value with TPROF_IOC_GETCOUNTS.
- Backend common parts are handled in tprof.c as much as possible, and functions
on the tprof_backend side have been reimplemented to be more primitive.
- The reset value of counter overflows for profiling can now be adjusted.
It is calculated by default from the CPU clock (speed of cycle counter) and
TPROF_HZ, but for some events the value may be too large to be sufficient for
profiling. The event counter can be specified as a ratio to the default or as
an absolute value when configuring the event counter.
- Due to overall changes, API and ABI have been changed. TPROF_VERSION and
TPROF_BACKEND_VERSION were updated.
To generate a diff of this commit:
cvs rdiff -u -r1.62 -r1.63 src/sys/arch/aarch64/include/armreg.h
cvs rdiff -u -r1.18 -r1.19 src/sys/dev/tprof/tprof.c
cvs rdiff -u -r1.6 -r1.7 src/sys/dev/tprof/tprof.h
cvs rdiff -u -r1.9 -r1.10 src/sys/dev/tprof/tprof_armv7.c
cvs rdiff -u -r1.17 -r1.18 src/sys/dev/tprof/tprof_armv8.c
cvs rdiff -u -r1.4 -r1.5 src/sys/dev/tprof/tprof_ioctl.h \
src/sys/dev/tprof/tprof_x86_intel.c
cvs rdiff -u -r1.5 -r1.6 src/sys/dev/tprof/tprof_types.h \
src/sys/dev/tprof/tprof_x86_amd.c
cvs rdiff -u -r1.1 -r1.2 src/sys/dev/tprof/tprof_x86.c
cvs rdiff -u -r1.16 -r1.17 src/usr.sbin/tprof/tprof.8
cvs rdiff -u -r1.13 -r1.14 src/usr.sbin/tprof/tprof.c
cvs rdiff -u -r1.5 -r1.6 src/usr.sbin/tprof/tprof_analyze.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/aarch64/include/armreg.h
diff -u src/sys/arch/aarch64/include/armreg.h:1.62 src/sys/arch/aarch64/include/armreg.h:1.63
--- src/sys/arch/aarch64/include/armreg.h:1.62 Thu Dec 1 00:29:10 2022
+++ src/sys/arch/aarch64/include/armreg.h Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: armreg.h,v 1.62 2022/12/01 00:29:10 ryo Exp $ */
+/* $NetBSD: armreg.h,v 1.63 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -248,6 +248,10 @@ AARCH64REG_READ_INLINE(id_aa64dfr0_el1)
#define ID_AA64DFR0_EL1_PMUVER_NONE 0
#define ID_AA64DFR0_EL1_PMUVER_V3 1
#define ID_AA64DFR0_EL1_PMUVER_NOV3 2
+#define ID_AA64DFR0_EL1_PMUVER_V3P1 4
+#define ID_AA64DFR0_EL1_PMUVER_V3P4 5
+#define ID_AA64DFR0_EL1_PMUVER_V3P5 6
+#define ID_AA64DFR0_EL1_PMUVER_V3P7 7
#define ID_AA64DFR0_EL1_PMUVER_IMPL 15
#define ID_AA64DFR0_EL1_TRACEVER __BITS(4,7)
#define ID_AA64DFR0_EL1_TRACEVER_NONE 0
@@ -1221,6 +1225,7 @@ AARCH64REG_WRITE_INLINE(pmcr_el0)
#define PMCR_IMP __BITS(31,24) // Implementor code
#define PMCR_IDCODE __BITS(23,16) // Identification code
#define PMCR_N __BITS(15,11) // Number of event counters
+#define PMCR_LP __BIT(7) // Long event counter enable
#define PMCR_LC __BIT(6) // Long cycle counter enable
#define PMCR_DP __BIT(5) // Disable cycle counter when event
// counting is prohibited
Index: src/sys/dev/tprof/tprof.c
diff -u src/sys/dev/tprof/tprof.c:1.18 src/sys/dev/tprof/tprof.c:1.19
--- src/sys/dev/tprof/tprof.c:1.18 Thu Dec 1 00:27:59 2022
+++ src/sys/dev/tprof/tprof.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $ */
+/* $NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -42,12 +42,17 @@ __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/workqueue.h>
+#include <sys/xcall.h>
#include <dev/tprof/tprof.h>
#include <dev/tprof/tprof_ioctl.h>
#include "ioconf.h"
+#ifndef TPROF_HZ
+#define TPROF_HZ 10000
+#endif
+
/*
* locking order:
* tprof_reader_lock -> tprof_lock
@@ -73,7 +78,7 @@ typedef struct tprof_buf {
} tprof_buf_t;
#define TPROF_BUF_BYTESIZE(sz) \
(sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
-#define TPROF_MAX_SAMPLES_PER_BUF 10000
+#define TPROF_MAX_SAMPLES_PER_BUF (TPROF_HZ * 2)
#define TPROF_MAX_BUF 100
@@ -85,14 +90,20 @@ typedef struct {
} __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
typedef struct tprof_backend {
+ /*
+ * tprof_backend_softc_t must be passed as an argument to the interrupt
+ * handler, but since this is difficult to implement in armv7/v8. Then,
+ * tprof_backend is exposed. Additionally, softc must be placed at the
+ * beginning of struct tprof_backend.
+ */
+ tprof_backend_softc_t tb_softc;
+
const char *tb_name;
const tprof_backend_ops_t *tb_ops;
LIST_ENTRY(tprof_backend) tb_list;
- int tb_usecount; /* S: */
} tprof_backend_t;
static kmutex_t tprof_lock;
-static bool tprof_running; /* s: */
static u_int tprof_nworker; /* L: # of running worker LWPs */
static lwp_t *tprof_owner;
static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
@@ -101,7 +112,7 @@ static struct workqueue *tprof_wq;
static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */
static u_int tprof_samples_per_buf;
-static tprof_backend_t *tprof_backend; /* S: */
+tprof_backend_t *tprof_backend; /* S: */
static LIST_HEAD(, tprof_backend) tprof_backends =
LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
@@ -193,6 +204,7 @@ tprof_worker(struct work *wk, void *dumm
{
tprof_cpu_t * const c = tprof_curcpu();
tprof_buf_t *buf;
+ tprof_backend_t *tb;
bool shouldstop;
KASSERT(wk == &c->c_work);
@@ -207,7 +219,8 @@ tprof_worker(struct work *wk, void *dumm
* and put it on the global list for read(2).
*/
mutex_enter(&tprof_lock);
- shouldstop = !tprof_running;
+ tb = tprof_backend;
+ shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0);
if (shouldstop) {
KASSERT(tprof_nworker > 0);
tprof_nworker--;
@@ -283,103 +296,352 @@ tprof_getinfo(struct tprof_info *info)
}
static int
-tprof_start(const tprof_param_t *param)
+tprof_getncounters(u_int *ncounters)
+{
+ tprof_backend_t *tb;
+
+ tb = tprof_backend;
+ if (tb == NULL)
+ return ENOENT;
+
+ *ncounters = tb->tb_ops->tbo_ncounters();
+ return 0;
+}
+
+static void
+tprof_start_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ tprof_countermask_t runmask = (uintptr_t)arg2;
+
+ tb->tb_ops->tbo_start(runmask);
+}
+
+static void
+tprof_stop_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ tprof_countermask_t stopmask = (uintptr_t)arg2;
+
+ tb->tb_ops->tbo_stop(stopmask);
+}
+
+static int
+tprof_start(tprof_countermask_t runmask)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
- int error;
- uint64_t freq;
tprof_backend_t *tb;
+ uint64_t xc;
+ int error;
+ bool firstrun;
KASSERT(mutex_owned(&tprof_startstop_lock));
- if (tprof_running) {
- error = EBUSY;
- goto done;
- }
tb = tprof_backend;
if (tb == NULL) {
error = ENOENT;
goto done;
}
- if (tb->tb_usecount > 0) {
- error = EBUSY;
+
+ runmask &= ~tb->tb_softc.sc_ctr_running_mask;
+ runmask &= tb->tb_softc.sc_ctr_configured_mask;
+ if (runmask == 0) {
+ /*
+ * targets are already running.
+ * unconfigured counters are ignored.
+ */
+ error = 0;
goto done;
}
- tb->tb_usecount++;
- freq = tb->tb_ops->tbo_estimate_freq();
- tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
-
- error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
- PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
- if (error != 0) {
- goto done;
+ firstrun = (tb->tb_softc.sc_ctr_running_mask == 0);
+ if (firstrun) {
+ if (tb->tb_ops->tbo_establish != NULL) {
+ error = tb->tb_ops->tbo_establish(&tb->tb_softc);
+ if (error != 0)
+ goto done;
+ }
+
+ tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF;
+ error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker,
+ NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
+ if (error != 0) {
+ if (tb->tb_ops->tbo_disestablish != NULL)
+ tb->tb_ops->tbo_disestablish(&tb->tb_softc);
+ goto done;
+ }
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ tprof_cpu_t * const c = tprof_cpu(ci);
+ tprof_buf_t *new;
+ tprof_buf_t *old;
+
+ new = tprof_buf_alloc();
+ old = tprof_buf_switch(c, new);
+ if (old != NULL) {
+ tprof_buf_free(old);
+ }
+ callout_init(&c->c_callout, CALLOUT_MPSAFE);
+ callout_setfunc(&c->c_callout, tprof_kick, ci);
+ }
}
- for (CPU_INFO_FOREACH(cii, ci)) {
- tprof_cpu_t * const c = tprof_cpu(ci);
- tprof_buf_t *new;
- tprof_buf_t *old;
+ runmask &= tb->tb_softc.sc_ctr_configured_mask;
+ xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask);
+ xc_wait(xc);
+ mutex_enter(&tprof_lock);
+ tb->tb_softc.sc_ctr_running_mask |= runmask;
+ mutex_exit(&tprof_lock);
- new = tprof_buf_alloc();
- old = tprof_buf_switch(c, new);
- if (old != NULL) {
- tprof_buf_free(old);
+ if (firstrun) {
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ tprof_cpu_t * const c = tprof_cpu(ci);
+
+ mutex_enter(&tprof_lock);
+ tprof_nworker++;
+ mutex_exit(&tprof_lock);
+ workqueue_enqueue(tprof_wq, &c->c_work, ci);
}
- callout_init(&c->c_callout, CALLOUT_MPSAFE);
- callout_setfunc(&c->c_callout, tprof_kick, ci);
}
+done:
+ return error;
+}
- error = tb->tb_ops->tbo_start(param);
- if (error != 0) {
- KASSERT(tb->tb_usecount > 0);
- tb->tb_usecount--;
- tprof_stop1();
+static void
+tprof_stop(tprof_countermask_t stopmask)
+{
+ tprof_backend_t *tb;
+ uint64_t xc;
+
+ tb = tprof_backend;
+ if (tb == NULL)
+ return;
+
+ KASSERT(mutex_owned(&tprof_startstop_lock));
+ stopmask &= tb->tb_softc.sc_ctr_running_mask;
+ if (stopmask == 0) {
+ /* targets are not running */
goto done;
}
+ xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask);
+ xc_wait(xc);
mutex_enter(&tprof_lock);
- tprof_running = true;
+ tb->tb_softc.sc_ctr_running_mask &= ~stopmask;
mutex_exit(&tprof_lock);
- for (CPU_INFO_FOREACH(cii, ci)) {
- tprof_cpu_t * const c = tprof_cpu(ci);
+ /* all counters have stopped? */
+ if (tb->tb_softc.sc_ctr_running_mask == 0) {
mutex_enter(&tprof_lock);
- tprof_nworker++;
+ cv_broadcast(&tprof_reader_cv);
+ while (tprof_nworker > 0) {
+ cv_wait(&tprof_cv, &tprof_lock);
+ }
mutex_exit(&tprof_lock);
- workqueue_enqueue(tprof_wq, &c->c_work, ci);
+
+ tprof_stop1();
+ if (tb->tb_ops->tbo_disestablish != NULL)
+ tb->tb_ops->tbo_disestablish(&tb->tb_softc);
}
done:
- return error;
+ ;
}
static void
-tprof_stop(void)
+tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci)
+{
+ uint64_t *counters_offset = vp;
+ u_int counter = (uintptr_t)vp2;
+
+ tprof_backend_t *tb = tprof_backend;
+ tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
+ counters_offset[counter] = param->p_value;
+}
+
+static void
+tprof_configure_event_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ u_int counter = (uintptr_t)arg2;
+ tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
+
+ tb->tb_ops->tbo_configure_event(counter, param);
+}
+
+static int
+tprof_configure_event(const tprof_param_t *param)
{
tprof_backend_t *tb;
+ tprof_backend_softc_t *sc;
+ tprof_param_t *sc_param;
+ uint64_t xc;
+ int c, error;
- KASSERT(mutex_owned(&tprof_startstop_lock));
- if (!tprof_running) {
+ if ((param->p_flags & (TPROF_PARAM_USER | TPROF_PARAM_KERN)) == 0) {
+ error = EINVAL;
goto done;
}
tb = tprof_backend;
- KASSERT(tb->tb_usecount > 0);
- tb->tb_ops->tbo_stop(NULL);
- tb->tb_usecount--;
+ if (tb == NULL) {
+ error = ENOENT;
+ goto done;
+ }
+ sc = &tb->tb_softc;
- mutex_enter(&tprof_lock);
- tprof_running = false;
- cv_broadcast(&tprof_reader_cv);
- while (tprof_nworker > 0) {
- cv_wait(&tprof_cv, &tprof_lock);
+ c = param->p_counter;
+ if (c >= tb->tb_softc.sc_ncounters) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (tb->tb_ops->tbo_valid_event != NULL) {
+ error = tb->tb_ops->tbo_valid_event(param->p_counter, param);
+ if (error != 0)
+ goto done;
+ }
+
+ /* if already running, stop the counter */
+ if (ISSET(c, tb->tb_softc.sc_ctr_running_mask))
+ tprof_stop(__BIT(c));
+
+ sc->sc_count[c].ctr_bitwidth =
+ tb->tb_ops->tbo_counter_bitwidth(param->p_counter);
+
+ sc_param = &sc->sc_count[c].ctr_param;
+ memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */
+
+ if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) {
+ uint64_t freq, inum, dnum;
+
+ freq = tb->tb_ops->tbo_counter_estimate_freq(c);
+ sc->sc_count[c].ctr_counter_val = freq / TPROF_HZ;
+ if (sc->sc_count[c].ctr_counter_val == 0) {
+ printf("%s: counter#%d frequency (%"PRIu64") is"
+ " very low relative to TPROF_HZ (%u)\n", __func__,
+ c, freq, TPROF_HZ);
+ sc->sc_count[c].ctr_counter_val =
+ 4000000000ULL / TPROF_HZ;
+ }
+
+ switch (param->p_flags & TPROF_PARAM_VALUE2_MASK) {
+ case TPROF_PARAM_VALUE2_SCALE:
+ if (sc_param->p_value2 == 0)
+ break;
+ /*
+ * p_value2 is 64-bit fixed-point
+ * upper 32 bits are the integer part
+ * lower 32 bits are the decimal part
+ */
+ inum = sc_param->p_value2 >> 32;
+ dnum = sc_param->p_value2 & __BITS(31, 0);
+ sc->sc_count[c].ctr_counter_val =
+ sc->sc_count[c].ctr_counter_val * inum +
+ (sc->sc_count[c].ctr_counter_val * dnum >> 32);
+ if (sc->sc_count[c].ctr_counter_val == 0)
+ sc->sc_count[c].ctr_counter_val = 1;
+ break;
+ case TPROF_PARAM_VALUE2_TRIGGERCOUNT:
+ if (sc_param->p_value2 == 0)
+ sc_param->p_value2 = 1;
+ if (sc_param->p_value2 >
+ __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)) {
+ sc_param->p_value2 =
+ __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0);
+ }
+ sc->sc_count[c].ctr_counter_val = sc_param->p_value2;
+ break;
+ default:
+ break;
+ }
+ sc->sc_count[c].ctr_counter_reset_val =
+ -sc->sc_count[c].ctr_counter_val;
+ sc->sc_count[c].ctr_counter_reset_val &=
+ __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0);
+ } else {
+ sc->sc_count[c].ctr_counter_val = 0;
+ sc->sc_count[c].ctr_counter_reset_val = 0;
}
+
+ /* At this point, p_value is used as an initial value */
+ percpu_foreach(tb->tb_softc.sc_ctr_offset_percpu,
+ tprof_init_percpu_counters_offset, (void *)(uintptr_t)c);
+ /* On the backend side, p_value is used as the reset value */
+ sc_param->p_value = tb->tb_softc.sc_count[c].ctr_counter_reset_val;
+
+ xc = xc_broadcast(0, tprof_configure_event_cpu,
+ tb, (void *)(uintptr_t)c);
+ xc_wait(xc);
+
+ mutex_enter(&tprof_lock);
+ /* update counters bitmasks */
+ SET(tb->tb_softc.sc_ctr_configured_mask, __BIT(c));
+ CLR(tb->tb_softc.sc_ctr_prof_mask, __BIT(c));
+ CLR(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
+ /* profiled counter requires overflow handling */
+ if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) {
+ SET(tb->tb_softc.sc_ctr_prof_mask, __BIT(c));
+ SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
+ }
+ /* counters with less than 64bits also require overflow handling */
+ if (sc->sc_count[c].ctr_bitwidth != 64)
+ SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
mutex_exit(&tprof_lock);
- tprof_stop1();
-done:
- ;
+ error = 0;
+
+ done:
+ return error;
+}
+
+static void
+tprof_getcounts_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ tprof_backend_softc_t *sc = &tb->tb_softc;
+ uint64_t *counters = arg2;
+ uint64_t *counters_offset;
+ unsigned int c;
+
+ tprof_countermask_t configmask = sc->sc_ctr_configured_mask;
+ counters_offset = percpu_getref(sc->sc_ctr_offset_percpu);
+ for (c = 0; c < sc->sc_ncounters; c++) {
+ if (ISSET(configmask, __BIT(c))) {
+ uint64_t ctr = tb->tb_ops->tbo_counter_read(c);
+ counters[c] = counters_offset[c] +
+ ((ctr - sc->sc_count[c].ctr_counter_reset_val) &
+ __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0));
+ } else {
+ counters[c] = 0;
+ }
+ }
+ percpu_putref(sc->sc_ctr_offset_percpu);
+}
+
+static int
+tprof_getcounts(tprof_counts_t *counts)
+{
+ struct cpu_info *ci;
+ tprof_backend_t *tb;
+ uint64_t xc;
+
+ tb = tprof_backend;
+ if (tb == NULL)
+ return ENOENT;
+
+ if (counts->c_cpu >= ncpu)
+ return ESRCH;
+ ci = cpu_lookup(counts->c_cpu);
+ if (ci == NULL)
+ return ESRCH;
+
+ xc = xc_unicast(0, tprof_getcounts_cpu, tb, counts->c_count, ci);
+ xc_wait(xc);
+
+ counts->c_ncounters = tb->tb_softc.sc_ncounters;
+ counts->c_runningmask = tb->tb_softc.sc_ctr_running_mask;
+ return 0;
}
/*
@@ -457,7 +719,8 @@ tprof_sample(void *unused, const tprof_f
sp->s_pid = l->l_proc->p_pid;
sp->s_lwpid = l->l_lid;
sp->s_cpuid = c->c_cpuid;
- sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0;
+ sp->s_flags = ((tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0) |
+ __SHIFTIN(tfi->tfi_counter, TPROF_SAMPLE_COUNTER_MASK);
sp->s_pc = pc;
buf->b_used = idx + 1;
}
@@ -488,10 +751,9 @@ tprof_backend_register(const char *name,
return ENOTSUP;
}
#endif
- tb = kmem_alloc(sizeof(*tb), KM_SLEEP);
+ tb = kmem_zalloc(sizeof(*tb), KM_SLEEP);
tb->tb_name = name;
tb->tb_ops = ops;
- tb->tb_usecount = 0;
LIST_INSERT_HEAD(&tprof_backends, tb, tb_list);
#if 1 /* XXX for now */
if (tprof_backend == NULL) {
@@ -500,6 +762,13 @@ tprof_backend_register(const char *name,
#endif
mutex_exit(&tprof_startstop_lock);
+ /* init backend softc */
+ tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters();
+ tb->tb_softc.sc_ctr_offset_percpu_size =
+ sizeof(uint64_t) * tb->tb_softc.sc_ncounters;
+ tb->tb_softc.sc_ctr_offset_percpu =
+ percpu_alloc(tb->tb_softc.sc_ctr_offset_percpu_size);
+
return 0;
}
@@ -520,7 +789,7 @@ tprof_backend_unregister(const char *nam
panic("%s: not found '%s'", __func__, name);
}
#endif /* defined(DIAGNOSTIC) */
- if (tb->tb_usecount > 0) {
+ if (tb->tb_softc.sc_ctr_running_mask != 0) {
mutex_exit(&tprof_startstop_lock);
return EBUSY;
}
@@ -532,6 +801,11 @@ tprof_backend_unregister(const char *nam
LIST_REMOVE(tb, tb_list);
mutex_exit(&tprof_startstop_lock);
+ /* fini backend softc */
+ percpu_free(tb->tb_softc.sc_ctr_offset_percpu,
+ tb->tb_softc.sc_ctr_offset_percpu_size);
+
+ /* free backend */
kmem_free(tb, sizeof(*tb));
return 0;
@@ -567,8 +841,17 @@ tprof_close(dev_t dev, int flags, int ty
mutex_enter(&tprof_lock);
tprof_owner = NULL;
mutex_exit(&tprof_lock);
- tprof_stop();
+ tprof_stop(TPROF_COUNTERMASK_ALL);
tprof_clear();
+
+ tprof_backend_t *tb = tprof_backend;
+ if (tb != NULL) {
+ KASSERT(tb->tb_softc.sc_ctr_running_mask == 0);
+ tb->tb_softc.sc_ctr_configured_mask = 0;
+ tb->tb_softc.sc_ctr_prof_mask = 0;
+ tb->tb_softc.sc_ctr_ovf_mask = 0;
+ }
+
mutex_exit(&tprof_startstop_lock);
return 0;
@@ -644,6 +927,7 @@ static int
tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
{
const tprof_param_t *param;
+ tprof_counts_t *counts;
int error = 0;
KASSERT(minor(dev) == 0);
@@ -654,15 +938,19 @@ tprof_ioctl(dev_t dev, u_long cmd, void
tprof_getinfo(data);
mutex_exit(&tprof_startstop_lock);
break;
+ case TPROF_IOC_GETNCOUNTERS:
+ mutex_enter(&tprof_lock);
+ error = tprof_getncounters((u_int *)data);
+ mutex_exit(&tprof_lock);
+ break;
case TPROF_IOC_START:
- param = data;
mutex_enter(&tprof_startstop_lock);
- error = tprof_start(param);
+ error = tprof_start(*(tprof_countermask_t *)data);
mutex_exit(&tprof_startstop_lock);
break;
case TPROF_IOC_STOP:
mutex_enter(&tprof_startstop_lock);
- tprof_stop();
+ tprof_stop(*(tprof_countermask_t *)data);
mutex_exit(&tprof_startstop_lock);
break;
case TPROF_IOC_GETSTAT:
@@ -670,6 +958,18 @@ tprof_ioctl(dev_t dev, u_long cmd, void
memcpy(data, &tprof_stat, sizeof(tprof_stat));
mutex_exit(&tprof_lock);
break;
+ case TPROF_IOC_CONFIGURE_EVENT:
+ param = data;
+ mutex_enter(&tprof_startstop_lock);
+ error = tprof_configure_event(param);
+ mutex_exit(&tprof_startstop_lock);
+ break;
+ case TPROF_IOC_GETCOUNTS:
+ counts = data;
+ mutex_enter(&tprof_startstop_lock);
+ error = tprof_getcounts(counts);
+ mutex_exit(&tprof_startstop_lock);
+ break;
default:
error = EINVAL;
break;
Index: src/sys/dev/tprof/tprof.h
diff -u src/sys/dev/tprof/tprof.h:1.6 src/sys/dev/tprof/tprof.h:1.7
--- src/sys/dev/tprof/tprof.h:1.6 Fri Jul 13 07:56:29 2018
+++ src/sys/dev/tprof/tprof.h Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof.h,v 1.6 2018/07/13 07:56:29 maxv Exp $ */
+/* $NetBSD: tprof.h,v 1.7 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
@@ -37,19 +37,45 @@
#include <dev/tprof/tprof_types.h>
+struct tprof_backend_softc_counter {
+ tprof_param_t ctr_param;
+ u_int ctr_bitwidth;
+ uint64_t ctr_counter_val;
+ uint64_t ctr_counter_reset_val;
+};
+
+typedef struct tprof_backend_softc {
+ u_int sc_ncounters;
+ tprof_countermask_t sc_ctr_running_mask;/* start/stop */
+ tprof_countermask_t sc_ctr_configured_mask; /* configured */
+ tprof_countermask_t sc_ctr_ovf_mask; /* overflow intr required */
+ tprof_countermask_t sc_ctr_prof_mask; /* profiled */
+ percpu_t *sc_ctr_offset_percpu;
+ size_t sc_ctr_offset_percpu_size;
+ struct tprof_backend_softc_counter sc_count[TPROF_MAXCOUNTERS];
+} tprof_backend_softc_t;
+
typedef struct tprof_backend_ops {
- uint64_t (*tbo_estimate_freq)(void); /* samples per second */
uint32_t (*tbo_ident)(void);
- int (*tbo_start)(const tprof_param_t *);
- void (*tbo_stop)(const tprof_param_t *);
+ u_int (*tbo_ncounters)(void);
+ u_int (*tbo_counter_bitwidth)(u_int);
+ uint64_t (*tbo_counter_read)(u_int);
+ uint64_t (*tbo_counter_estimate_freq)(u_int);
+ int (*tbo_valid_event)(u_int, const tprof_param_t *);
+ void (*tbo_configure_event)(u_int, const tprof_param_t *);
+ void (*tbo_start)(tprof_countermask_t);
+ void (*tbo_stop)(tprof_countermask_t);
+ int (*tbo_establish)(tprof_backend_softc_t *);
+ void (*tbo_disestablish)(tprof_backend_softc_t *);
} tprof_backend_ops_t;
-#define TPROF_BACKEND_VERSION 3
+#define TPROF_BACKEND_VERSION 4
int tprof_backend_register(const char *, const tprof_backend_ops_t *, int);
int tprof_backend_unregister(const char *);
typedef struct {
uintptr_t tfi_pc; /* program counter */
+ u_int tfi_counter; /* counter. 0..(TPROF_MAXCOUNTERS-1) */
bool tfi_inkernel; /* if tfi_pc is in the kernel address space */
} tprof_frame_info_t;
Index: src/sys/dev/tprof/tprof_armv7.c
diff -u src/sys/dev/tprof/tprof_armv7.c:1.9 src/sys/dev/tprof/tprof_armv7.c:1.10
--- src/sys/dev/tprof/tprof_armv7.c:1.9 Thu Dec 1 00:29:51 2022
+++ src/sys/dev/tprof/tprof_armv7.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_armv7.c,v 1.9 2022/12/01 00:29:51 ryo Exp $ */
+/* $NetBSD: tprof_armv7.c,v 1.10 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c) 2018 Jared McNeill <[email protected]>
@@ -27,11 +27,12 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.9 2022/12/01 00:29:51 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.10 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/cpu.h>
+#include <sys/percpu.h>
#include <sys/xcall.h>
#include <dev/tprof/tprof.h>
@@ -50,15 +51,13 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_armv7.
#define PMCNTEN_C __BIT(31)
#define PMCNTEN_P __BITS(30,0)
+#define PMOVS_C __BIT(31)
+#define PMOVS_P __BITS(30,0)
+
#define PMEVTYPER_P __BIT(31)
#define PMEVTYPER_U __BIT(30)
#define PMEVTYPER_EVTCOUNT __BITS(7,0)
-static tprof_param_t armv7_pmu_param;
-static const u_int armv7_pmu_counter = 1;
-static uint32_t counter_val;
-static uint32_t counter_reset_val;
-
static uint16_t cortexa9_events[] = {
0x40, 0x41, 0x42,
0x50, 0x51,
@@ -118,7 +117,7 @@ armv7_pmu_set_pmevtyper(u_int counter, u
armreg_pmxevtyper_write(val);
}
-static void
+static inline void
armv7_pmu_set_pmevcntr(u_int counter, uint32_t val)
{
armreg_pmselr_write(counter);
@@ -126,138 +125,175 @@ armv7_pmu_set_pmevcntr(u_int counter, ui
armreg_pmxevcntr_write(val);
}
-static void
-armv7_pmu_start_cpu(void *arg1, void *arg2)
+static inline uint64_t
+armv7_pmu_get_pmevcntr(u_int counter)
{
- const uint32_t counter_mask = __BIT(armv7_pmu_counter);
- uint64_t pmcr, pmevtyper;
-
- /* Enable performance monitor */
- pmcr = armreg_pmcr_read();
- pmcr |= PMCR_E;
- armreg_pmcr_write(pmcr);
-
- /* Disable event counter */
- armreg_pmcntenclr_write(counter_mask);
-
- /* Configure event counter */
- pmevtyper = __SHIFTIN(armv7_pmu_param.p_event, PMEVTYPER_EVTCOUNT);
- if (!ISSET(armv7_pmu_param.p_flags, TPROF_PARAM_USER))
- pmevtyper |= PMEVTYPER_U;
- if (!ISSET(armv7_pmu_param.p_flags, TPROF_PARAM_KERN))
- pmevtyper |= PMEVTYPER_P;
-
- armv7_pmu_set_pmevtyper(armv7_pmu_counter, pmevtyper);
-
- /* Enable overflow interrupts */
- armreg_pmintenset_write(counter_mask);
-
- /* Clear overflow flag */
- armreg_pmovsr_write(counter_mask);
+ armreg_pmselr_write(counter);
+ isb();
+ return armreg_pmxevcntr_read();
+}
- /* Initialize event counter value */
- armv7_pmu_set_pmevcntr(armv7_pmu_counter, counter_reset_val);
+/* read and write at once */
+static inline uint64_t
+armv7_pmu_getset_pmevcntr(u_int counter, uint64_t val)
+{
+ uint64_t c;
- /* Enable event counter */
- armreg_pmcntenset_write(counter_mask);
+ armreg_pmselr_write(counter);
+ isb();
+ c = armreg_pmxevcntr_read();
+ armreg_pmxevcntr_write(val);
+ return c;
}
-static void
-armv7_pmu_stop_cpu(void *arg1, void *arg2)
+static uint32_t
+armv7_pmu_ncounters(void)
{
- const uint32_t counter_mask = __BIT(armv7_pmu_counter);
-
- /* Disable overflow interrupts */
- armreg_pmintenclr_write(counter_mask);
+ return __SHIFTOUT(armreg_pmcr_read(), PMCR_N);
+}
- /* Disable event counter */
- armreg_pmcntenclr_write(counter_mask);
+static u_int
+armv7_pmu_counter_bitwidth(u_int counter)
+{
+ return 32;
}
static uint64_t
-armv7_pmu_estimate_freq(void)
+armv7_pmu_counter_estimate_freq(u_int counter)
{
uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
- uint64_t freq = 10000;
- uint32_t pmcr;
-
- counter_val = cpufreq / freq;
- if (counter_val == 0)
- counter_val = 4000000000ULL / freq;
-
- pmcr = armreg_pmcr_read();
- if (pmcr & PMCR_D)
- counter_val /= 64;
- return freq;
-}
-
-static uint32_t
-armv7_pmu_ident(void)
-{
- return TPROF_IDENT_ARMV7_GENERIC;
+ if (ISSET(armreg_pmcr_read(), PMCR_D))
+ cpufreq /= 64;
+ return cpufreq;
}
static int
-armv7_pmu_start(const tprof_param_t *param)
+armv7_pmu_valid_event(u_int counter, const tprof_param_t *param)
{
- /* PMCR.N of 0 means that no event counters are available */
- if (__SHIFTOUT(armreg_pmcr_read(), PMCR_N) == 0) {
- return EINVAL;
- }
-
if (!armv7_pmu_event_implemented(param->p_event)) {
- printf("%s: event %#llx not implemented on this CPU\n",
+ printf("%s: event %#" PRIx64 " not implemented on this CPU\n",
__func__, param->p_event);
return EINVAL;
}
+ return 0;
+}
- counter_reset_val = -counter_val + 1;
+static void
+armv7_pmu_configure_event(u_int counter, const tprof_param_t *param)
+{
+ /* Disable event counter */
+ armreg_pmcntenclr_write(__BIT(counter) & PMCNTEN_P);
- armv7_pmu_param = *param;
- uint64_t xc = xc_broadcast(0, armv7_pmu_start_cpu, NULL, NULL);
- xc_wait(xc);
+ /* Disable overflow interrupts */
+ armreg_pmintenclr_write(__BIT(counter) & PMINTEN_P);
- return 0;
+ /* Configure event counter */
+ uint32_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT);
+ if (!ISSET(param->p_flags, TPROF_PARAM_USER))
+ pmevtyper |= PMEVTYPER_U;
+ if (!ISSET(param->p_flags, TPROF_PARAM_KERN))
+ pmevtyper |= PMEVTYPER_P;
+ armv7_pmu_set_pmevtyper(counter, pmevtyper);
+
+ /*
+ * Enable overflow interrupts.
+ * Whether profiled or not, the counter width of armv7 is 32 bits,
+ * so overflow handling is required anyway.
+ */
+ armreg_pmintenset_write(__BIT(counter) & PMINTEN_P);
+
+ /* Clear overflow flag */
+ armreg_pmovsr_write(__BIT(counter) & PMOVS_P);
+
+ /* reset the counter */
+ armv7_pmu_set_pmevcntr(counter, param->p_value);
}
static void
-armv7_pmu_stop(const tprof_param_t *param)
+armv7_pmu_start(tprof_countermask_t runmask)
{
- uint64_t xc;
+ /* Enable event counters */
+ armreg_pmcntenset_write(runmask & PMCNTEN_P);
- xc = xc_broadcast(0, armv7_pmu_stop_cpu, NULL, NULL);
- xc_wait(xc);
+ /*
+ * PMCR.E is shared with PMCCNTR and event counters.
+ * It is set here in case PMCCNTR is not used in the system.
+ */
+ armreg_pmcr_write(armreg_pmcr_read() | PMCR_E);
}
-static const tprof_backend_ops_t tprof_armv7_pmu_ops = {
- .tbo_estimate_freq = armv7_pmu_estimate_freq,
- .tbo_ident = armv7_pmu_ident,
- .tbo_start = armv7_pmu_start,
- .tbo_stop = armv7_pmu_stop,
-};
+static void
+armv7_pmu_stop(tprof_countermask_t stopmask)
+{
+ /* Disable event counter */
+ armreg_pmcntenclr_write(stopmask & PMCNTEN_P);
+}
+
+/* XXX: argument of armv8_pmu_intr() */
+extern struct tprof_backend *tprof_backend;
+static void *pmu_intr_arg;
int
armv7_pmu_intr(void *priv)
{
const struct trapframe * const tf = priv;
- const uint32_t counter_mask = __BIT(armv7_pmu_counter);
+ tprof_backend_softc_t *sc = pmu_intr_arg;
tprof_frame_info_t tfi;
+ int bit;
+ const uint32_t pmovs = armreg_pmovsr_read() & PMOVS_P;
- const uint32_t pmovsr = armreg_pmovsr_read();
- if ((pmovsr & counter_mask) != 0) {
- tfi.tfi_pc = tf->tf_pc;
- tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
- tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
- tprof_sample(NULL, &tfi);
-
- armv7_pmu_set_pmevcntr(armv7_pmu_counter, counter_reset_val);
+ uint64_t *counters_offset =
+ percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
+ uint32_t mask = pmovs;
+ while ((bit = ffs(mask)) != 0) {
+ bit--;
+ CLR(mask, __BIT(bit));
+
+ if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
+ /* account for the counter, and reset */
+ uint64_t ctr = armv7_pmu_getset_pmevcntr(bit,
+ sc->sc_count[bit].ctr_counter_reset_val);
+ counters_offset[bit] +=
+ sc->sc_count[bit].ctr_counter_val + ctr;
+
+ /* record a sample */
+ tfi.tfi_pc = tf->tf_pc;
+ tfi.tfi_counter = bit;
+ tfi.tfi_inkernel =
+ tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
+ tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
+ tprof_sample(NULL, &tfi);
+ } else {
+ /* counter has overflowed */
+ counters_offset[bit] += __BIT(32);
+ }
}
- armreg_pmovsr_write(pmovsr);
+ armreg_pmovsr_write(pmovs);
return 1;
}
+static uint32_t
+armv7_pmu_ident(void)
+{
+ return TPROF_IDENT_ARMV7_GENERIC;
+}
+
+static const tprof_backend_ops_t tprof_armv7_pmu_ops = {
+ .tbo_ident = armv7_pmu_ident,
+ .tbo_ncounters = armv7_pmu_ncounters,
+ .tbo_counter_bitwidth = armv7_pmu_counter_bitwidth,
+ .tbo_counter_read = armv7_pmu_get_pmevcntr,
+ .tbo_counter_estimate_freq = armv7_pmu_counter_estimate_freq,
+ .tbo_valid_event = armv7_pmu_valid_event,
+ .tbo_configure_event = armv7_pmu_configure_event,
+ .tbo_start = armv7_pmu_start,
+ .tbo_stop = armv7_pmu_stop,
+ .tbo_establish = NULL,
+ .tbo_disestablish = NULL,
+};
+
static void
armv7_pmu_init_cpu(void *arg1, void *arg2)
{
@@ -274,9 +310,21 @@ armv7_pmu_init_cpu(void *arg1, void *arg
int
armv7_pmu_init(void)
{
+ int error, ncounters;
+
+ ncounters = armv7_pmu_ncounters();
+ if (ncounters == 0)
+ return ENOTSUP;
+
uint64_t xc = xc_broadcast(0, armv7_pmu_init_cpu, NULL, NULL);
xc_wait(xc);
- return tprof_backend_register("tprof_armv7", &tprof_armv7_pmu_ops,
+ error = tprof_backend_register("tprof_armv7", &tprof_armv7_pmu_ops,
TPROF_BACKEND_VERSION);
+ if (error == 0) {
+ /* XXX: for argument of armv7_pmu_intr() */
+ pmu_intr_arg = tprof_backend;
+ }
+
+ return error;
}
Index: src/sys/dev/tprof/tprof_armv8.c
diff -u src/sys/dev/tprof/tprof_armv8.c:1.17 src/sys/dev/tprof/tprof_armv8.c:1.18
--- src/sys/dev/tprof/tprof_armv8.c:1.17 Thu Dec 1 00:29:10 2022
+++ src/sys/dev/tprof/tprof_armv8.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_armv8.c,v 1.17 2022/12/01 00:29:10 ryo Exp $ */
+/* $NetBSD: tprof_armv8.c,v 1.18 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c) 2018 Jared McNeill <[email protected]>
@@ -27,11 +27,12 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.17 2022/12/01 00:29:10 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.18 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/cpu.h>
+#include <sys/percpu.h>
#include <sys/xcall.h>
#include <dev/tprof/tprof.h>
@@ -41,10 +42,12 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_armv8.
#include <dev/tprof/tprof_armv8.h>
-static tprof_param_t armv8_pmu_param;
-static const u_int armv8_pmu_counter = 0;
-static uint32_t counter_val;
-static uint32_t counter_reset_val;
+static u_int counter_bitwidth;
+
+/*
+ * armv8 can handle up to 31 event counters,
+ * PMCR_EL0.N counters are actually available.
+ */
static bool
armv8_pmu_event_implemented(uint16_t event)
@@ -75,137 +78,178 @@ armv8_pmu_set_pmevtyper(u_int counter, u
reg_pmxevtyper_el0_write(val);
}
-static void
-armv8_pmu_set_pmevcntr(u_int counter, uint32_t val)
+static inline void
+armv8_pmu_set_pmevcntr(u_int counter, uint64_t val)
{
reg_pmselr_el0_write(counter);
isb();
reg_pmxevcntr_el0_write(val);
}
-static void
-armv8_pmu_start_cpu(void *arg1, void *arg2)
+static inline uint64_t
+armv8_pmu_get_pmevcntr(u_int counter)
{
- const uint32_t counter_mask = __BIT(armv8_pmu_counter);
- uint64_t pmevtyper;
-
- /* Disable event counter */
- reg_pmcntenclr_el0_write(counter_mask);
-
- /* Configure event counter */
- pmevtyper = __SHIFTIN(armv8_pmu_param.p_event, PMEVTYPER_EVTCOUNT);
- if (!ISSET(armv8_pmu_param.p_flags, TPROF_PARAM_USER))
- pmevtyper |= PMEVTYPER_U;
- if (!ISSET(armv8_pmu_param.p_flags, TPROF_PARAM_KERN))
- pmevtyper |= PMEVTYPER_P;
-
- armv8_pmu_set_pmevtyper(armv8_pmu_counter, pmevtyper);
-
- /* Enable overflow interrupts */
- reg_pmintenset_el1_write(counter_mask);
-
- /* Clear overflow flag */
- reg_pmovsclr_el0_write(counter_mask);
-
- /* Initialize event counter value */
- armv8_pmu_set_pmevcntr(armv8_pmu_counter, counter_reset_val);
-
- /* Enable event counter */
- reg_pmcntenset_el0_write(counter_mask);
- reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E);
+ reg_pmselr_el0_write(counter);
+ isb();
+ return reg_pmxevcntr_el0_read();
}
-static void
-armv8_pmu_stop_cpu(void *arg1, void *arg2)
+/* read and write at once */
+static inline uint64_t
+armv8_pmu_getset_pmevcntr(u_int counter, uint64_t val)
{
- const uint32_t counter_mask = __BIT(armv8_pmu_counter);
-
- /* Disable overflow interrupts */
- reg_pmintenclr_el1_write(counter_mask);
+ uint64_t c;
- /* Disable event counter */
- reg_pmcntenclr_el0_write(counter_mask);
+ reg_pmselr_el0_write(counter);
+ isb();
+ c = reg_pmxevcntr_el0_read();
+ reg_pmxevcntr_el0_write(val);
+ return c;
}
-static uint64_t
-armv8_pmu_estimate_freq(void)
+static uint32_t
+armv8_pmu_ncounters(void)
{
- uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
- uint64_t freq = 10000;
-
- counter_val = cpufreq / freq;
- if (counter_val == 0)
- counter_val = 4000000000ULL / freq;
+ return __SHIFTOUT(reg_pmcr_el0_read(), PMCR_N);
+}
- return freq;
+static u_int
+armv8_pmu_counter_bitwidth(u_int counter)
+{
+ return counter_bitwidth;
}
-static uint32_t
-armv8_pmu_ident(void)
+static uint64_t
+armv8_pmu_counter_estimate_freq(u_int counter)
{
- return TPROF_IDENT_ARMV8_GENERIC;
+ return curcpu()->ci_data.cpu_cc_freq;
}
static int
-armv8_pmu_start(const tprof_param_t *param)
+armv8_pmu_valid_event(u_int counter, const tprof_param_t *param)
{
- /* PMCR.N of 0 means that no event counters are available */
- if (__SHIFTOUT(reg_pmcr_el0_read(), PMCR_N) == 0) {
- return EINVAL;
- }
-
if (!armv8_pmu_event_implemented(param->p_event)) {
printf("%s: event %#" PRIx64 " not implemented on this CPU\n",
__func__, param->p_event);
return EINVAL;
}
+ return 0;
+}
+
+static void
+armv8_pmu_configure_event(u_int counter, const tprof_param_t *param)
+{
+ /* Disable event counter */
+ reg_pmcntenclr_el0_write(__BIT(counter) & PMCNTEN_P);
- counter_reset_val = -counter_val + 1;
+ /* Disable overflow interrupts */
+ reg_pmintenclr_el1_write(__BIT(counter) & PMINTEN_P);
- armv8_pmu_param = *param;
- uint64_t xc = xc_broadcast(0, armv8_pmu_start_cpu, NULL, NULL);
- xc_wait(xc);
+ /* Configure event counter */
+ uint64_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT);
+ if (!ISSET(param->p_flags, TPROF_PARAM_USER))
+ pmevtyper |= PMEVTYPER_U;
+ if (!ISSET(param->p_flags, TPROF_PARAM_KERN))
+ pmevtyper |= PMEVTYPER_P;
+ armv8_pmu_set_pmevtyper(counter, pmevtyper);
- return 0;
+ if (ISSET(param->p_flags, TPROF_PARAM_PROFILE) ||
+ counter_bitwidth != 64) {
+ /* Enable overflow interrupts */
+ reg_pmintenset_el1_write(__BIT(counter) & PMINTEN_P);
+ }
+
+ /* Clear overflow flag */
+ reg_pmovsclr_el0_write(__BIT(counter) & PMOVS_P);
+
+ /* reset the counter */
+ armv8_pmu_set_pmevcntr(counter, param->p_value);
}
static void
-armv8_pmu_stop(const tprof_param_t *param)
+armv8_pmu_start(tprof_countermask_t runmask)
{
- uint64_t xc;
+ /* Enable event counters */
+ reg_pmcntenset_el0_write(runmask & PMCNTEN_P);
- xc = xc_broadcast(0, armv8_pmu_stop_cpu, NULL, NULL);
- xc_wait(xc);
+ /*
+ * PMCR.E is shared with PMCCNTR_EL0 and event counters.
+ * It is set here in case PMCCNTR_EL0 is not used in the system.
+ */
+ reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E);
}
-static const tprof_backend_ops_t tprof_armv8_pmu_ops = {
- .tbo_estimate_freq = armv8_pmu_estimate_freq,
- .tbo_ident = armv8_pmu_ident,
- .tbo_start = armv8_pmu_start,
- .tbo_stop = armv8_pmu_stop,
-};
+static void
+armv8_pmu_stop(tprof_countermask_t stopmask)
+{
+ /* Disable event counter */
+ reg_pmcntenclr_el0_write(stopmask & PMCNTEN_P);
+}
+
+/* XXX: argument of armv8_pmu_intr() */
+extern struct tprof_backend *tprof_backend;
+static void *pmu_intr_arg;
int
armv8_pmu_intr(void *priv)
{
const struct trapframe * const tf = priv;
- const uint32_t counter_mask = __BIT(armv8_pmu_counter);
+ tprof_backend_softc_t *sc = pmu_intr_arg;
tprof_frame_info_t tfi;
+ int bit;
+ const uint32_t pmovs = reg_pmovsset_el0_read() & PMOVS_P;
- const uint32_t pmovs = reg_pmovsset_el0_read();
- if ((pmovs & counter_mask) != 0) {
- tfi.tfi_pc = tf->tf_pc;
- tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
- tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
- tprof_sample(NULL, &tfi);
-
- armv8_pmu_set_pmevcntr(armv8_pmu_counter, counter_reset_val);
+ uint64_t *counters_offset =
+ percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
+ uint32_t mask = pmovs;
+ while ((bit = ffs(mask)) != 0) {
+ bit--;
+ CLR(mask, __BIT(bit));
+
+ if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
+ /* account for the counter, and reset */
+ uint64_t ctr = armv8_pmu_getset_pmevcntr(bit,
+ sc->sc_count[bit].ctr_counter_reset_val);
+ counters_offset[bit] +=
+ sc->sc_count[bit].ctr_counter_val + ctr;
+
+ /* record a sample */
+ tfi.tfi_pc = tf->tf_pc;
+ tfi.tfi_counter = bit;
+ tfi.tfi_inkernel =
+ tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
+ tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
+ tprof_sample(NULL, &tfi);
+ } else {
+ /* counter has overflowed */
+ counters_offset[bit] += __BIT(32);
+ }
}
reg_pmovsclr_el0_write(pmovs);
return 1;
}
+static uint32_t
+armv8_pmu_ident(void)
+{
+ return TPROF_IDENT_ARMV8_GENERIC;
+}
+
+static const tprof_backend_ops_t tprof_armv8_pmu_ops = {
+ .tbo_ident = armv8_pmu_ident,
+ .tbo_ncounters = armv8_pmu_ncounters,
+ .tbo_counter_bitwidth = armv8_pmu_counter_bitwidth,
+ .tbo_counter_read = armv8_pmu_get_pmevcntr,
+ .tbo_counter_estimate_freq = armv8_pmu_counter_estimate_freq,
+ .tbo_valid_event = armv8_pmu_valid_event,
+ .tbo_configure_event = armv8_pmu_configure_event,
+ .tbo_start = armv8_pmu_start,
+ .tbo_stop = armv8_pmu_stop,
+ .tbo_establish = NULL,
+ .tbo_disestablish = NULL,
+};
+
static void
armv8_pmu_init_cpu(void *arg1, void *arg2)
{
@@ -232,11 +276,32 @@ armv8_pmu_detect(void)
int
armv8_pmu_init(void)
{
+ int error, ncounters;
+
KASSERT(armv8_pmu_detect());
+ ncounters = armv8_pmu_ncounters();
+ if (ncounters == 0)
+ return ENOTSUP;
+
+ /* Is 64bit event counter available? */
+ const uint64_t dfr0 = reg_id_aa64dfr0_el1_read();
+ const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER);
+ if (pmuver >= ID_AA64DFR0_EL1_PMUVER_V3P5 &&
+ ISSET(reg_pmcr_el0_read(), PMCR_LP))
+ counter_bitwidth = 64;
+ else
+ counter_bitwidth = 32;
+
uint64_t xc = xc_broadcast(0, armv8_pmu_init_cpu, NULL, NULL);
xc_wait(xc);
- return tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops,
+ error = tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops,
TPROF_BACKEND_VERSION);
+ if (error == 0) {
+ /* XXX: for argument of armv8_pmu_intr() */
+ pmu_intr_arg = tprof_backend;
+ }
+
+ return error;
}
Index: src/sys/dev/tprof/tprof_ioctl.h
diff -u src/sys/dev/tprof/tprof_ioctl.h:1.4 src/sys/dev/tprof/tprof_ioctl.h:1.5
--- src/sys/dev/tprof/tprof_ioctl.h:1.4 Fri Jul 13 07:56:29 2018
+++ src/sys/dev/tprof/tprof_ioctl.h Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_ioctl.h,v 1.4 2018/07/13 07:56:29 maxv Exp $ */
+/* $NetBSD: tprof_ioctl.h,v 1.5 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c)2008,2010 YAMAMOTO Takashi,
@@ -37,17 +37,12 @@
#include <dev/tprof/tprof_types.h>
-#define TPROF_VERSION 4 /* kernel-userland ABI version */
+#define TPROF_VERSION 5 /* kernel-userland ABI version */
struct tprof_info {
uint32_t ti_version;
uint32_t ti_ident;
};
-#define TPROF_IOC_GETINFO _IOR('T', 1, struct tprof_info)
-
-#define TPROF_IOC_START _IOW('T', 2, tprof_param_t)
-
-#define TPROF_IOC_STOP _IO('T', 3)
struct tprof_stat {
uint64_t ts_sample; /* samples successfully recorded */
@@ -57,6 +52,13 @@ struct tprof_stat {
uint64_t ts_dropbuf; /* buffers dropped due to the global limit */
uint64_t ts_dropbuf_sample; /* samples dropped with ts_dropbuf */
};
-#define TPROF_IOC_GETSTAT _IOR('T', 4, struct tprof_stat)
+
+#define TPROF_IOC_GETINFO _IOR('T', 1, struct tprof_info)
+#define TPROF_IOC_START _IOW('T', 2, tprof_countermask_t)
+#define TPROF_IOC_STOP _IOW('T', 3, tprof_countermask_t)
+#define TPROF_IOC_GETSTAT _IOR('T', 4, struct tprof_stat)
+#define TPROF_IOC_GETNCOUNTERS _IOR('T', 5, u_int)
+#define TPROF_IOC_CONFIGURE_EVENT _IOW('T', 6, tprof_param_t)
+#define TPROF_IOC_GETCOUNTS _IOWR('T', 7, tprof_counts_t)
#endif /* _DEV_TPROF_TPROF_IOCTL_H_ */
Index: src/sys/dev/tprof/tprof_x86_intel.c
diff -u src/sys/dev/tprof/tprof_x86_intel.c:1.4 src/sys/dev/tprof/tprof_x86_intel.c:1.5
--- src/sys/dev/tprof/tprof_x86_intel.c:1.4 Thu May 26 13:02:04 2022
+++ src/sys/dev/tprof/tprof_x86_intel.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_x86_intel.c,v 1.4 2022/05/26 13:02:04 msaitoh Exp $ */
+/* $NetBSD: tprof_x86_intel.c,v 1.5 2022/12/01 00:32:52 ryo Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -56,15 +56,15 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof_x86_intel.c,v 1.4 2022/05/26 13:02:04 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof_x86_intel.c,v 1.5 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/cpu.h>
+#include <sys/percpu.h>
#include <sys/xcall.h>
#include <dev/tprof/tprof.h>
@@ -79,6 +79,12 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_in
#include <machine/i82489reg.h>
#include <machine/i82489var.h>
+#define NCTRS 4 /* XXX */
+static u_int counter_bitwidth;
+
+#define PERFEVTSEL(i) (MSR_EVNTSEL0 + (i))
+#define PERFCTR(i) (MSR_PERFCTR0 + (i))
+
#define PERFEVTSEL_EVENT_SELECT __BITS(0, 7)
#define PERFEVTSEL_UNIT_MASK __BITS(8, 15)
#define PERFEVTSEL_USR __BIT(16)
@@ -90,72 +96,115 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_in
#define PERFEVTSEL_INV __BIT(23)
#define PERFEVTSEL_COUNTER_MASK __BITS(24, 31)
-static uint64_t counter_bitwidth;
-static uint64_t counter_val = 5000000;
-static uint64_t counter_reset_val;
-
static uint32_t intel_lapic_saved[MAXCPUS];
static nmi_handler_t *intel_nmi_handle;
-static tprof_param_t intel_param;
+
+static uint32_t
+tprof_intel_ncounters(void)
+{
+ return NCTRS;
+}
+
+static u_int
+tprof_intel_counter_bitwidth(u_int counter)
+{
+ return counter_bitwidth;
+}
+
+static inline void
+tprof_intel_counter_write(u_int counter, uint64_t val)
+{
+ wrmsr(PERFCTR(counter), val);
+}
+
+static inline uint64_t
+tprof_intel_counter_read(u_int counter)
+{
+ return rdmsr(PERFCTR(counter));
+}
static void
-tprof_intel_start_cpu(void *arg1, void *arg2)
+tprof_intel_configure_event(u_int counter, const tprof_param_t *param)
{
- struct cpu_info * const ci = curcpu();
uint64_t evtval;
evtval =
- __SHIFTIN(intel_param.p_event, PERFEVTSEL_EVENT_SELECT) |
- __SHIFTIN(intel_param.p_unit, PERFEVTSEL_UNIT_MASK) |
- ((intel_param.p_flags & TPROF_PARAM_USER) ? PERFEVTSEL_USR : 0) |
- ((intel_param.p_flags & TPROF_PARAM_KERN) ? PERFEVTSEL_OS : 0) |
- PERFEVTSEL_INT |
- PERFEVTSEL_EN;
+ __SHIFTIN(param->p_event, PERFEVTSEL_EVENT_SELECT) |
+ __SHIFTIN(param->p_unit, PERFEVTSEL_UNIT_MASK) |
+ ((param->p_flags & TPROF_PARAM_USER) ? PERFEVTSEL_USR : 0) |
+ ((param->p_flags & TPROF_PARAM_KERN) ? PERFEVTSEL_OS : 0) |
+ PERFEVTSEL_INT;
+ wrmsr(PERFEVTSEL(counter), evtval);
- wrmsr(MSR_PERFCTR0, counter_reset_val);
- wrmsr(MSR_EVNTSEL0, evtval);
-
- intel_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT);
- lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI);
+ /* reset the counter */
+ tprof_intel_counter_write(counter, param->p_value);
}
static void
-tprof_intel_stop_cpu(void *arg1, void *arg2)
+tprof_intel_start(tprof_countermask_t runmask)
{
- struct cpu_info * const ci = curcpu();
+ int bit;
- wrmsr(MSR_EVNTSEL0, 0);
- wrmsr(MSR_PERFCTR0, 0);
+ while ((bit = ffs(runmask)) != 0) {
+ bit--;
+ CLR(runmask, __BIT(bit));
+ wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) | PERFEVTSEL_EN);
+ }
+}
- lapic_writereg(LAPIC_LVT_PCINT, intel_lapic_saved[cpu_index(ci)]);
+static void
+tprof_intel_stop(tprof_countermask_t stopmask)
+{
+ int bit;
+
+ while ((bit = ffs(stopmask)) != 0) {
+ bit--;
+ CLR(stopmask, __BIT(bit));
+ wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) & ~PERFEVTSEL_EN);
+ }
}
static int
-tprof_intel_nmi(const struct trapframe *tf, void *dummy)
+tprof_intel_nmi(const struct trapframe *tf, void *arg)
{
- uint32_t pcint;
- uint64_t ctr;
+ tprof_backend_softc_t *sc = arg;
tprof_frame_info_t tfi;
+ uint32_t pcint;
+ int bit;
- KASSERT(dummy == NULL);
-
- ctr = rdmsr(MSR_PERFCTR0);
- /* If the highest bit is non zero, then it's not for us. */
- if ((ctr & __BIT(counter_bitwidth-1)) != 0) {
- return 0;
- }
+ uint64_t *counters_offset =
+ percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
+ tprof_countermask_t mask = sc->sc_ctr_ovf_mask;
+ while ((bit = ffs(mask)) != 0) {
+ bit--;
+ CLR(mask, __BIT(bit));
+
+ /* If the highest bit is non zero, then it's not for us. */
+ uint64_t ctr = tprof_intel_counter_read(bit);
+ if ((ctr & __BIT(counter_bitwidth - 1)) != 0)
+ continue; /* not overflowed */
+
+ if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
+ /* account for the counter, and reset */
+ tprof_intel_counter_write(bit,
+ sc->sc_count[bit].ctr_counter_reset_val);
+ counters_offset[bit] +=
+ sc->sc_count[bit].ctr_counter_val + ctr;
- /* record a sample */
+ /* record a sample */
#if defined(__x86_64__)
- tfi.tfi_pc = tf->tf_rip;
+ tfi.tfi_pc = tf->tf_rip;
#else
- tfi.tfi_pc = tf->tf_eip;
+ tfi.tfi_pc = tf->tf_eip;
#endif
- tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS;
- tprof_sample(NULL, &tfi);
-
- /* reset counter */
- wrmsr(MSR_PERFCTR0, counter_reset_val);
+ tfi.tfi_counter = bit;
+ tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS;
+ tprof_sample(NULL, &tfi);
+ } else {
+ /* not profiled, but require to consider overflow */
+ counters_offset[bit] += __BIT(counter_bitwidth);
+ }
+ }
/* unmask PMI */
pcint = lapic_readreg(LAPIC_LVT_PCINT);
@@ -166,16 +215,9 @@ tprof_intel_nmi(const struct trapframe *
}
static uint64_t
-tprof_intel_estimate_freq(void)
+tprof_intel_counter_estimate_freq(u_int counter)
{
- uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
- uint64_t freq = 10000;
-
- counter_val = cpufreq / freq;
- if (counter_val == 0) {
- counter_val = UINT64_C(4000000000) / freq;
- }
- return freq;
+ return curcpu()->ci_data.cpu_cc_freq;
}
static uint32_t
@@ -203,8 +245,25 @@ tprof_intel_ident(void)
return TPROF_IDENT_INTEL_GENERIC;
}
+static void
+tprof_intel_establish_cpu(void *arg1, void *arg2)
+{
+ struct cpu_info * const ci = curcpu();
+
+ intel_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT);
+ lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI);
+}
+
+static void
+tprof_intel_disestablish_cpu(void *arg1, void *arg2)
+{
+ struct cpu_info * const ci = curcpu();
+
+ lapic_writereg(LAPIC_LVT_PCINT, intel_lapic_saved[cpu_index(ci)]);
+}
+
static int
-tprof_intel_start(const tprof_param_t *param)
+tprof_intel_establish(tprof_backend_softc_t *sc)
{
uint64_t xc;
@@ -213,23 +272,20 @@ tprof_intel_start(const tprof_param_t *p
}
KASSERT(intel_nmi_handle == NULL);
- intel_nmi_handle = nmi_establish(tprof_intel_nmi, NULL);
-
- counter_reset_val = - counter_val + 1;
- memcpy(&intel_param, param, sizeof(*param));
+ intel_nmi_handle = nmi_establish(tprof_intel_nmi, sc);
- xc = xc_broadcast(0, tprof_intel_start_cpu, NULL, NULL);
+ xc = xc_broadcast(0, tprof_intel_establish_cpu, sc, NULL);
xc_wait(xc);
return 0;
}
static void
-tprof_intel_stop(const tprof_param_t *param)
+tprof_intel_disestablish(tprof_backend_softc_t *sc)
{
uint64_t xc;
- xc = xc_broadcast(0, tprof_intel_stop_cpu, NULL, NULL);
+ xc = xc_broadcast(0, tprof_intel_disestablish_cpu, sc, NULL);
xc_wait(xc);
KASSERT(intel_nmi_handle != NULL);
@@ -238,8 +294,15 @@ tprof_intel_stop(const tprof_param_t *pa
}
const tprof_backend_ops_t tprof_intel_ops = {
- .tbo_estimate_freq = tprof_intel_estimate_freq,
.tbo_ident = tprof_intel_ident,
+ .tbo_ncounters = tprof_intel_ncounters,
+ .tbo_counter_bitwidth = tprof_intel_counter_bitwidth,
+ .tbo_counter_read = tprof_intel_counter_read,
+ .tbo_counter_estimate_freq = tprof_intel_counter_estimate_freq,
+ .tbo_valid_event = NULL,
+ .tbo_configure_event = tprof_intel_configure_event,
.tbo_start = tprof_intel_start,
.tbo_stop = tprof_intel_stop,
+ .tbo_establish = tprof_intel_establish,
+ .tbo_disestablish = tprof_intel_disestablish,
};
Index: src/sys/dev/tprof/tprof_types.h
diff -u src/sys/dev/tprof/tprof_types.h:1.5 src/sys/dev/tprof/tprof_types.h:1.6
--- src/sys/dev/tprof/tprof_types.h:1.5 Sun Jul 15 23:46:25 2018
+++ src/sys/dev/tprof/tprof_types.h Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_types.h,v 1.5 2018/07/15 23:46:25 jmcneill Exp $ */
+/* $NetBSD: tprof_types.h,v 1.6 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c)2010,2011 YAMAMOTO Takashi,
@@ -39,26 +39,55 @@
#include <stdint.h>
#endif
+#define TPROF_MAXCOUNTERS 32
+typedef uint32_t tprof_countermask_t;
+#define TPROF_COUNTERMASK_ALL __BITS(31, 0)
+
typedef struct {
uint32_t s_pid; /* process id */
uint32_t s_lwpid; /* lwp id */
uint32_t s_cpuid; /* cpu id */
- uint32_t s_flags; /* flags */
+ uint32_t s_flags; /* flags and counterID */
+#define TPROF_SAMPLE_INKERNEL 0x00000001 /* s_pc is in kernel address space */
+#define TPROF_SAMPLE_COUNTER_MASK 0xff000000 /* 0..(TPROF_MAXCOUNTERS-1) */
uintptr_t s_pc; /* program counter */
} tprof_sample_t;
typedef struct tprof_param {
+ u_int p_counter; /* 0..(TPROF_MAXCOUNTERS-1) */
+ u_int p__unused;
uint64_t p_event; /* event class */
uint64_t p_unit; /* unit within the event class */
uint64_t p_flags;
+#define TPROF_PARAM_KERN 0x1
+#define TPROF_PARAM_USER 0x2
+#define TPROF_PARAM_PROFILE 0x4
+#define TPROF_PARAM_VALUE2_MASK __BITS(63, 60)
+#define TPROF_PARAM_VALUE2_SCALE __SHIFTIN(1, TPROF_PARAM_VALUE2_MASK)
+#define TPROF_PARAM_VALUE2_TRIGGERCOUNT __SHIFTIN(2, TPROF_PARAM_VALUE2_MASK)
+ uint64_t p_value; /* initial value */
+ uint64_t p_value2;
+ /*
+ * p_value2 is an optional value. (p_flags & TPROF_PARAM_VALUE2_MASK)
+ * determines the usage.
+ *
+ * TPROF_PARAM_VALUE2_SCALE:
+ * Specify the counter speed as the reciprocal of the cycle counter
+ * speed ratio. if the counter is N times slower than the cycle
+ * counter, p_value2 is (0x1_0000_0000 / N). 0 is treated as 1.0.
+ * TPROF_PARAM_VALUE2_TRIGGERCOUNT:
+ * When the event counter counts up p_value2, an interrupt for profile
+ * is generated. 0 is treated as 1.
+ */
} tprof_param_t;
-/* s_flags */
-#define TPROF_SAMPLE_INKERNEL 1 /* s_pc is in kernel address space */
-
-/* p_flags */
-#define TPROF_PARAM_KERN 0x01
-#define TPROF_PARAM_USER 0x02
+typedef struct tprof_counts {
+ uint32_t c_cpu; /* W */
+ uint32_t c_ncounters; /* R */
+ tprof_countermask_t c_runningmask; /* R */
+ uint32_t c__unused;
+ uint64_t c_count[TPROF_MAXCOUNTERS]; /* R */
+} tprof_counts_t;
/* ti_ident */
#define TPROF_IDENT_NONE 0x00
Index: src/sys/dev/tprof/tprof_x86_amd.c
diff -u src/sys/dev/tprof/tprof_x86_amd.c:1.5 src/sys/dev/tprof/tprof_x86_amd.c:1.6
--- src/sys/dev/tprof/tprof_x86_amd.c:1.5 Fri Oct 11 18:04:52 2019
+++ src/sys/dev/tprof/tprof_x86_amd.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_x86_amd.c,v 1.5 2019/10/11 18:04:52 jmcneill Exp $ */
+/* $NetBSD: tprof_x86_amd.c,v 1.6 2022/12/01 00:32:52 ryo Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -56,7 +56,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof_x86_amd.c,v 1.5 2019/10/11 18:04:52 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof_x86_amd.c,v 1.6 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -64,6 +64,7 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_am
#include <sys/module.h>
#include <sys/cpu.h>
+#include <sys/percpu.h>
#include <sys/xcall.h>
#include <dev/tprof/tprof.h>
@@ -78,7 +79,8 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_am
#include <machine/i82489reg.h>
#include <machine/i82489var.h>
-#define NCTRS 4
+#define NCTRS 4
+#define COUNTER_BITWIDTH 48
#define PERFEVTSEL(i) (0xc0010000 + (i))
#define PERFCTR(i) (0xc0010004 + (i))
@@ -106,92 +108,128 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_am
* http://developer.amd.com/wordpress/media/2012/10/Basic_Performance_Measurements.pdf
*/
-static int ctrno = 0;
-static uint64_t counter_val = 5000000;
-static uint64_t counter_reset_val;
static uint32_t amd_lapic_saved[MAXCPUS];
static nmi_handler_t *amd_nmi_handle;
-static tprof_param_t amd_param;
+
+static uint32_t
+tprof_amd_ncounters(void)
+{
+ return NCTRS;
+}
+
+static u_int
+tprof_amd_counter_bitwidth(u_int counter)
+{
+ return COUNTER_BITWIDTH;
+}
+
+static inline void
+tprof_amd_counter_write(u_int counter, uint64_t val)
+{
+ wrmsr(PERFCTR(counter), val);
+}
+
+static inline uint64_t
+tprof_amd_counter_read(u_int counter)
+{
+ return rdmsr(PERFCTR(counter));
+}
static void
-tprof_amd_start_cpu(void *arg1, void *arg2)
+tprof_amd_configure_event(u_int counter, const tprof_param_t *param)
{
- struct cpu_info * const ci = curcpu();
uint64_t pesr;
uint64_t event_lo;
uint64_t event_hi;
- event_hi = amd_param.p_event >> 8;
- event_lo = amd_param.p_event & 0xff;
+ event_hi = param->p_event >> 8;
+ event_lo = param->p_event & 0xff;
pesr =
- ((amd_param.p_flags & TPROF_PARAM_USER) ? PESR_USR : 0) |
- ((amd_param.p_flags & TPROF_PARAM_KERN) ? PESR_OS : 0) |
+ ((param->p_flags & TPROF_PARAM_USER) ? PESR_USR : 0) |
+ ((param->p_flags & TPROF_PARAM_KERN) ? PESR_OS : 0) |
PESR_INT |
__SHIFTIN(event_lo, PESR_EVENT_MASK_LO) |
__SHIFTIN(event_hi, PESR_EVENT_MASK_HI) |
__SHIFTIN(0, PESR_COUNTER_MASK) |
- __SHIFTIN(amd_param.p_unit, PESR_UNIT_MASK);
+ __SHIFTIN(param->p_unit, PESR_UNIT_MASK);
+ wrmsr(PERFEVTSEL(counter), pesr);
- wrmsr(PERFCTR(ctrno), counter_reset_val);
- wrmsr(PERFEVTSEL(ctrno), pesr);
+ /* reset the counter */
+ tprof_amd_counter_write(counter, param->p_value);
+}
- amd_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT);
- lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI);
+static void
+tprof_amd_start(tprof_countermask_t runmask)
+{
+ int bit;
- wrmsr(PERFEVTSEL(ctrno), pesr | PESR_EN);
+ while ((bit = ffs(runmask)) != 0) {
+ bit--;
+ CLR(runmask, __BIT(bit));
+ wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) | PESR_EN);
+ }
}
static void
-tprof_amd_stop_cpu(void *arg1, void *arg2)
+tprof_amd_stop(tprof_countermask_t stopmask)
{
- struct cpu_info * const ci = curcpu();
+ int bit;
- wrmsr(PERFEVTSEL(ctrno), 0);
-
- lapic_writereg(LAPIC_LVT_PCINT, amd_lapic_saved[cpu_index(ci)]);
+ while ((bit = ffs(stopmask)) != 0) {
+ bit--;
+ CLR(stopmask, __BIT(bit));
+ wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) & ~PESR_EN);
+ }
}
static int
-tprof_amd_nmi(const struct trapframe *tf, void *dummy)
+tprof_amd_nmi(const struct trapframe *tf, void *arg)
{
+ tprof_backend_softc_t *sc = arg;
tprof_frame_info_t tfi;
- uint64_t ctr;
-
- KASSERT(dummy == NULL);
+ int bit;
- /* check if it's for us */
- ctr = rdmsr(PERFCTR(ctrno));
- if ((ctr & (UINT64_C(1) << 63)) != 0) { /* check if overflowed */
- /* not ours */
- return 0;
- }
+ uint64_t *counters_offset =
+ percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
+ tprof_countermask_t mask = sc->sc_ctr_ovf_mask;
+ while ((bit = ffs(mask)) != 0) {
+ bit--;
+ CLR(mask, __BIT(bit));
+
+ /* If the highest bit is non zero, then it's not for us. */
+ uint64_t ctr = tprof_amd_counter_read(bit);
+ if ((ctr & __BIT(COUNTER_BITWIDTH - 1)) != 0)
+ continue; /* not overflowed */
+
+ if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
+ /* account for the counter, and reset */
+ tprof_amd_counter_write(bit,
+ sc->sc_count[bit].ctr_counter_reset_val);
+ counters_offset[bit] +=
+ sc->sc_count[bit].ctr_counter_val + ctr;
- /* record a sample */
+ /* record a sample */
#if defined(__x86_64__)
- tfi.tfi_pc = tf->tf_rip;
+ tfi.tfi_pc = tf->tf_rip;
#else
- tfi.tfi_pc = tf->tf_eip;
+ tfi.tfi_pc = tf->tf_eip;
#endif
- tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS;
- tprof_sample(NULL, &tfi);
-
- /* reset counter */
- wrmsr(PERFCTR(ctrno), counter_reset_val);
+ tfi.tfi_counter = bit;
+ tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS;
+ tprof_sample(NULL, &tfi);
+ } else {
+ /* not profiled, but require to consider overflow */
+ counters_offset[bit] += __BIT(COUNTER_BITWIDTH);
+ }
+ }
return 1;
}
static uint64_t
-tprof_amd_estimate_freq(void)
+tprof_amd_counter_estimate_freq(u_int counter)
{
- uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
- uint64_t freq = 10000;
-
- counter_val = cpufreq / freq;
- if (counter_val == 0) {
- counter_val = UINT64_C(4000000000) / freq;
- }
- return freq;
+ return curcpu()->ci_data.cpu_cc_freq;
}
static uint32_t
@@ -213,8 +251,25 @@ tprof_amd_ident(void)
return TPROF_IDENT_NONE;
}
+static void
+tprof_amd_establish_cpu(void *arg1, void *arg2)
+{
+ struct cpu_info * const ci = curcpu();
+
+ amd_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT);
+ lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI);
+}
+
+static void
+tprof_amd_disestablish_cpu(void *arg1, void *arg2)
+{
+ struct cpu_info * const ci = curcpu();
+
+ lapic_writereg(LAPIC_LVT_PCINT, amd_lapic_saved[cpu_index(ci)]);
+}
+
static int
-tprof_amd_start(const tprof_param_t *param)
+tprof_amd_establish(tprof_backend_softc_t *sc)
{
uint64_t xc;
@@ -223,23 +278,20 @@ tprof_amd_start(const tprof_param_t *par
}
KASSERT(amd_nmi_handle == NULL);
- amd_nmi_handle = nmi_establish(tprof_amd_nmi, NULL);
-
- counter_reset_val = - counter_val + 1;
- memcpy(&amd_param, param, sizeof(*param));
+ amd_nmi_handle = nmi_establish(tprof_amd_nmi, sc);
- xc = xc_broadcast(0, tprof_amd_start_cpu, NULL, NULL);
+ xc = xc_broadcast(0, tprof_amd_establish_cpu, sc, NULL);
xc_wait(xc);
return 0;
}
static void
-tprof_amd_stop(const tprof_param_t *param)
+tprof_amd_disestablish(tprof_backend_softc_t *sc)
{
uint64_t xc;
- xc = xc_broadcast(0, tprof_amd_stop_cpu, NULL, NULL);
+ xc = xc_broadcast(0, tprof_amd_disestablish_cpu, sc, NULL);
xc_wait(xc);
KASSERT(amd_nmi_handle != NULL);
@@ -248,8 +300,15 @@ tprof_amd_stop(const tprof_param_t *para
}
const tprof_backend_ops_t tprof_amd_ops = {
- .tbo_estimate_freq = tprof_amd_estimate_freq,
.tbo_ident = tprof_amd_ident,
+ .tbo_ncounters = tprof_amd_ncounters,
+ .tbo_counter_bitwidth = tprof_amd_counter_bitwidth,
+ .tbo_counter_read = tprof_amd_counter_read,
+ .tbo_counter_estimate_freq = tprof_amd_counter_estimate_freq,
+ .tbo_valid_event = NULL,
+ .tbo_configure_event = tprof_amd_configure_event,
.tbo_start = tprof_amd_start,
.tbo_stop = tprof_amd_stop,
+ .tbo_establish = tprof_amd_establish,
+ .tbo_disestablish = tprof_amd_disestablish,
};
Index: src/sys/dev/tprof/tprof_x86.c
diff -u src/sys/dev/tprof/tprof_x86.c:1.1 src/sys/dev/tprof/tprof_x86.c:1.2
--- src/sys/dev/tprof/tprof_x86.c:1.1 Tue Jul 24 09:47:35 2018
+++ src/sys/dev/tprof/tprof_x86.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_x86.c,v 1.1 2018/07/24 09:47:35 maxv Exp $ */
+/* $NetBSD: tprof_x86.c,v 1.2 2022/12/01 00:32:52 ryo Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof_x86.c,v 1.1 2018/07/24 09:47:35 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof_x86.c,v 1.2 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -54,16 +54,28 @@ extern const tprof_backend_ops_t tprof_i
static int
tprof_x86_init(void)
{
+ const tprof_backend_ops_t *ops;
+ const char *name;
+ int ncounters;
+
switch (cpu_vendor) {
case CPUVENDOR_AMD:
- return tprof_backend_register("tprof_amd", &tprof_amd_ops,
- TPROF_BACKEND_VERSION);
+ name = "tprof_amd";
+ ops = &tprof_amd_ops;
+ break;
case CPUVENDOR_INTEL:
- return tprof_backend_register("tprof_intel", &tprof_intel_ops,
- TPROF_BACKEND_VERSION);
+ name = "tprof_intel";
+ ops = &tprof_intel_ops;
+ break;
default:
return ENOTSUP;
}
+
+ ncounters = ops->tbo_ncounters();
+ if (ncounters == 0)
+ return ENOTSUP;
+
+ return tprof_backend_register(name, ops, TPROF_BACKEND_VERSION);
}
static int
Index: src/usr.sbin/tprof/tprof.8
diff -u src/usr.sbin/tprof/tprof.8:1.16 src/usr.sbin/tprof/tprof.8:1.17
--- src/usr.sbin/tprof/tprof.8:1.16 Wed May 25 06:17:19 2022
+++ src/usr.sbin/tprof/tprof.8 Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-.\" $NetBSD: tprof.8,v 1.16 2022/05/25 06:17:19 msaitoh Exp $
+.\" $NetBSD: tprof.8,v 1.17 2022/12/01 00:32:52 ryo Exp $
.\"
.\" Copyright (c)2011 YAMAMOTO Takashi,
.\" All rights reserved.
@@ -24,7 +24,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd October 11, 2019
+.Dd December 1, 2022
.Dt TPROF 8
.Os
.Sh NAME
@@ -67,6 +67,7 @@ Display a list of performance counter ev
.It monitor Xo
.Fl e
.Ar name:option
+.Op Fl e Ar ...
.Op Fl o Ar outfile
.Ar command
.Xc
Index: src/usr.sbin/tprof/tprof.c
diff -u src/usr.sbin/tprof/tprof.c:1.13 src/usr.sbin/tprof/tprof.c:1.14
--- src/usr.sbin/tprof/tprof.c:1.13 Tue Jul 24 09:50:37 2018
+++ src/usr.sbin/tprof/tprof.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof.c,v 1.13 2018/07/24 09:50:37 maxv Exp $ */
+/* $NetBSD: tprof.c,v 1.14 2022/12/01 00:32:52 ryo Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -57,7 +57,7 @@
#include <sys/cdefs.h>
#ifndef lint
-__RCSID("$NetBSD: tprof.c,v 1.13 2018/07/24 09:50:37 maxv Exp $");
+__RCSID("$NetBSD: tprof.c,v 1.14 2022/12/01 00:32:52 ryo Exp $");
#endif /* not lint */
#include <sys/ioctl.h>
@@ -80,8 +80,11 @@ __RCSID("$NetBSD: tprof.c,v 1.13 2018/07
#define _PATH_TPROF "/dev/tprof"
+struct tprof_info tprof_info;
+u_int ncounters;
int devfd;
int outfd;
+u_int nevent;
static void tprof_list(int, char **);
static void tprof_monitor(int, char **) __dead;
@@ -106,7 +109,7 @@ usage(void)
fprintf(stderr, "\n");
fprintf(stderr, "\tlist\n");
fprintf(stderr, "\t\tList the available events.\n");
- fprintf(stderr, "\tmonitor -e name:option [-o outfile] command\n");
+ fprintf(stderr, "\tmonitor -e name:option [-e ...] [-o outfile] command\n");
fprintf(stderr, "\t\tMonitor the event 'name' with option 'option'\n"
"\t\tcounted during the execution of 'command'.\n");
fprintf(stderr, "\tanalyze [-CkLPs] [-p pid] file\n");
@@ -156,14 +159,15 @@ static void
tprof_monitor(int argc, char **argv)
{
const char *outfile = "tprof.out";
- struct tprof_param param;
struct tprof_stat ts;
+ tprof_param_t params[TPROF_MAXCOUNTERS];
pid_t pid;
pthread_t pt;
- int ret, ch;
+ int ret, ch, i;
char *tokens[2];
+ tprof_countermask_t mask = TPROF_COUNTERMASK_ALL;
- memset(¶m, 0, sizeof(param));
+ memset(params, 0, sizeof(params));
while ((ch = getopt(argc, argv, "o:e:")) != -1) {
switch (ch) {
@@ -175,11 +179,17 @@ tprof_monitor(int argc, char **argv)
tokens[1] = strtok(NULL, ":");
if (tokens[1] == NULL)
usage();
- tprof_event_lookup(tokens[0], ¶m);
+ tprof_event_lookup(tokens[0], ¶ms[nevent]);
if (strchr(tokens[1], 'u'))
- param.p_flags |= TPROF_PARAM_USER;
+ params[nevent].p_flags |= TPROF_PARAM_USER;
if (strchr(tokens[1], 'k'))
- param.p_flags |= TPROF_PARAM_KERN;
+ params[nevent].p_flags |= TPROF_PARAM_KERN;
+ if (params[nevent].p_flags == 0)
+ usage();
+ nevent++;
+ if (nevent > __arraycount(params) ||
+ nevent > ncounters)
+ errx(EXIT_FAILURE, "Too many events");
break;
default:
usage();
@@ -187,11 +197,7 @@ tprof_monitor(int argc, char **argv)
}
argc -= optind;
argv += optind;
- if (argc == 0) {
- usage();
- }
-
- if (param.p_flags == 0) {
+ if (argc == 0 || nevent == 0) {
usage();
}
@@ -200,7 +206,15 @@ tprof_monitor(int argc, char **argv)
err(EXIT_FAILURE, "%s", outfile);
}
- ret = ioctl(devfd, TPROF_IOC_START, ¶m);
+ for (i = 0; i < (int)nevent; i++) {
+ params[i].p_counter = i;
+ params[i].p_flags |= TPROF_PARAM_PROFILE;
+ ret = ioctl(devfd, TPROF_IOC_CONFIGURE_EVENT, ¶ms[i]);
+ if (ret == -1)
+ err(EXIT_FAILURE, "TPROF_IOC_CONFIGURE_EVENT");
+ }
+
+ ret = ioctl(devfd, TPROF_IOC_START, &mask);
if (ret == -1) {
err(EXIT_FAILURE, "TPROF_IOC_START");
}
@@ -237,7 +251,7 @@ tprof_monitor(int argc, char **argv)
}
}
- ret = ioctl(devfd, TPROF_IOC_STOP, NULL);
+ ret = ioctl(devfd, TPROF_IOC_STOP, &mask);
if (ret == -1) {
err(EXIT_FAILURE, "TPROF_IOC_STOP");
}
@@ -263,7 +277,6 @@ tprof_monitor(int argc, char **argv)
int
main(int argc, char *argv[])
{
- struct tprof_info info;
const struct cmdtab *ct;
int ret;
@@ -275,18 +288,26 @@ main(int argc, char *argv[])
err(EXIT_FAILURE, "%s", _PATH_TPROF);
}
- ret = ioctl(devfd, TPROF_IOC_GETINFO, &info);
+ ret = ioctl(devfd, TPROF_IOC_GETINFO, &tprof_info);
if (ret == -1) {
err(EXIT_FAILURE, "TPROF_IOC_GETINFO");
}
- if (info.ti_version != TPROF_VERSION) {
+ if (tprof_info.ti_version != TPROF_VERSION) {
errx(EXIT_FAILURE, "version mismatch: version=%d, expected=%d",
- info.ti_version, TPROF_VERSION);
+ tprof_info.ti_version, TPROF_VERSION);
}
- if (tprof_event_init(info.ti_ident) == -1) {
+ if (tprof_event_init(tprof_info.ti_ident) == -1) {
errx(EXIT_FAILURE, "cpu not supported");
}
+ ret = ioctl(devfd, TPROF_IOC_GETNCOUNTERS, &ncounters);
+ if (ret == -1) {
+ err(EXIT_FAILURE, "TPROF_IOC_GETNCOUNTERS");
+ }
+ if (ncounters == 0) {
+ errx(EXIT_FAILURE, "no available counters");
+ }
+
if (argc == 0)
usage();
Index: src/usr.sbin/tprof/tprof_analyze.c
diff -u src/usr.sbin/tprof/tprof_analyze.c:1.5 src/usr.sbin/tprof/tprof_analyze.c:1.6
--- src/usr.sbin/tprof/tprof_analyze.c:1.5 Thu Oct 14 09:52:40 2021
+++ src/usr.sbin/tprof/tprof_analyze.c Thu Dec 1 00:32:52 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof_analyze.c,v 1.5 2021/10/14 09:52:40 skrll Exp $ */
+/* $NetBSD: tprof_analyze.c,v 1.6 2022/12/01 00:32:52 ryo Exp $ */
/*
* Copyright (c) 2010,2011,2012 YAMAMOTO Takashi,
@@ -28,7 +28,7 @@
#include <sys/cdefs.h>
#ifndef lint
-__RCSID("$NetBSD: tprof_analyze.c,v 1.5 2021/10/14 09:52:40 skrll Exp $");
+__RCSID("$NetBSD: tprof_analyze.c,v 1.6 2022/12/01 00:32:52 ryo Exp $");
#endif /* not lint */
#include <assert.h>
@@ -63,6 +63,7 @@ struct addr {
uint32_t cpuid; /* cpu id */
bool in_kernel; /* if addr is in the kernel address space */
unsigned int nsamples; /* number of samples taken for the address */
+ unsigned int ncount[TPROF_MAXCOUNTERS]; /* count per event */
};
static rb_tree_t addrtree;
@@ -278,6 +279,7 @@ tprof_analyze(int argc, char **argv)
size_t naddrs, nsamples, i;
float perc;
int ch;
+ u_int c, maxevent = 0;
bool distinguish_processes = true;
bool distinguish_cpus = true;
bool distinguish_lwps = true;
@@ -363,6 +365,7 @@ tprof_analyze(int argc, char **argv)
continue;
}
a = emalloc(sizeof(*a));
+ memset(a, 0, sizeof(*a));
a->addr = (uint64_t)sample.s_pc;
if (distinguish_processes) {
a->pid = sample.s_pid;
@@ -389,7 +392,13 @@ tprof_analyze(int argc, char **argv)
a->addr -= offset;
}
}
+ c = __SHIFTOUT(sample.s_flags, TPROF_SAMPLE_COUNTER_MASK);
+ assert(c < TPROF_MAXCOUNTERS);
+ if (maxevent < c)
+ maxevent = c;
+
a->nsamples = 1;
+ a->ncount[c] = 1;
o = rb_tree_insert_node(&addrtree, a);
if (o != a) {
assert(a->addr == o->addr);
@@ -398,7 +407,9 @@ tprof_analyze(int argc, char **argv)
assert(a->cpuid == o->cpuid);
assert(a->in_kernel == o->in_kernel);
free(a);
+
o->nsamples++;
+ o->ncount[c]++;
} else {
naddrs++;
}
@@ -423,8 +434,17 @@ tprof_analyze(int argc, char **argv)
*/
printf("File: %s\n", argv[0]);
printf("Number of samples: %zu\n\n", nsamples);
- printf("percentage nsamples pid lwp cpu k address symbol\n");
- printf("------------ -------- ------ ------ ---- - ---------------- ------\n");
+
+ printf("percentage nsamples ");
+ for (c = 0; c <= maxevent; c++)
+ printf("event#%02u ", c);
+ printf("pid lwp cpu k address symbol\n");
+
+ printf("------------ -------- ");
+ for (c = 0; c <= maxevent; c++)
+ printf("-------- ");
+
+ printf("------ ------ ---- - ---------------- ------\n");
for (i = 0; i < naddrs; i++) {
const char *name;
char buf[100];
@@ -448,11 +468,17 @@ tprof_analyze(int argc, char **argv)
perc = ((float)a->nsamples / (float)nsamples) * 100.0;
- printf("%11f%% %8u %6" PRIu32 " %6" PRIu32 " %4" PRIu32 " %u %016"
- PRIx64 " %s\n",
- perc,
- a->nsamples, a->pid, a->lwpid, a->cpuid, a->in_kernel,
- a->addr, name);
+ printf("%11f%% %8u", perc, a->nsamples);
+
+ for (c = 0; c <= maxevent; c++)
+ printf(" %8u", a->ncount[c]);
+
+ printf(" %6" PRIu32 " %6" PRIu32 " %4" PRIu32 " %u %016"
+ PRIx64" %s",
+ a->pid, a->lwpid, a->cpuid, a->in_kernel, a->addr, name);
+
+
+ printf("\n");
}
fclose(f);