x86 and arm64 can both support direct access of event counters in userspace. The access sequence is less than trivial and currently exists in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in projects such as PAPI and libpfm4.
Patches to add arm64 userspace support are pending[1]. For this RFC, looking for a yes, seems like a good idea, or no, go away we don't want this in libperf. TODO: - Handle userspace access not enabled. - Handle pmc_width and cap_user_time_short in read loop. - Move existing rdpmc test to libperf based test? - Abstract out rdtsc/rdpmc (currently only builds on x86 and need to add Arm versions) [1] https://lore.kernel.org/r/20200707205333.624938-1-r...@kernel.org/ Cc: Peter Zijlstra <pet...@infradead.org> Cc: Ingo Molnar <mi...@redhat.com> Cc: Arnaldo Carvalho de Melo <a...@kernel.org> Cc: Mark Rutland <mark.rutl...@arm.com> Cc: Alexander Shishkin <alexander.shish...@linux.intel.com> Cc: Jiri Olsa <jo...@redhat.com> Cc: Namhyung Kim <namhy...@kernel.org> Signed-off-by: Rob Herring <r...@kernel.org> --- .../perf/Documentation/examples/user_read.c | 103 ++++++++++++++++++ tools/lib/perf/include/perf/mmap.h | 2 + tools/lib/perf/libperf.map | 1 + tools/lib/perf/mmap.c | 72 ++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 tools/lib/perf/Documentation/examples/user_read.c diff --git a/tools/lib/perf/Documentation/examples/user_read.c b/tools/lib/perf/Documentation/examples/user_read.c new file mode 100644 index 000000000000..47d5f1935861 --- /dev/null +++ b/tools/lib/perf/Documentation/examples/user_read.c @@ -0,0 +1,103 @@ +#include <linux/perf_event.h> +#include <perf/evlist.h> +#include <perf/evsel.h> +#include <perf/cpumap.h> +#include <perf/threadmap.h> +#include <perf/mmap.h> +#include <perf/core.h> +#include <perf/event.h> +#include <stdio.h> +#include <unistd.h> + +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + +static unsigned long long read_counter(struct perf_evlist *evlist) +{ + struct perf_mmap *map; + unsigned long long cnt; + + perf_evlist__for_each_mmap(evlist, map, false) { + while ((cnt = perf_mmap__read_self(map)) == ~0ULL) ; + return cnt; + } + return 0; +} + +int main(int argc, char **argv) +{ + int count = 100000, err = 0; + unsigned long long start, end; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_thread_map *threads; + struct perf_cpu_map *cpus; + struct perf_counts_values counts; + + struct perf_event_attr attr1 = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .disabled = 1, + .exclude_kernel = 1, + }; + + libperf_init(libperf_print); + + evlist = perf_evlist__new(); + if (!evlist) { + fprintf(stderr, "failed to create evlist\n"); + goto out_threads; + } + evsel = perf_evsel__new(&attr1); + if (!evsel) { + fprintf(stderr, "failed to create evsel1\n"); + goto out_evlist; + } + perf_evlist__add(evlist, evsel); + + threads = perf_thread_map__new_dummy(); + if (!threads) { + fprintf(stderr, "failed to create threads\n"); + return -1; + } + perf_thread_map__set_pid(threads, 0, 0); + cpus = perf_cpu_map__dummy_new(); + if (!cpus) { + fprintf(stderr, "failed to create cpus\n"); + return -1; + } + perf_evlist__set_maps(evlist, cpus, threads); + + err = perf_evlist__open(evlist); + if (err) { + fprintf(stderr, "failed to open evsel\n"); + goto out_evlist; + } + + err = perf_evlist__mmap(evlist, 0); + if (err) { + fprintf(stderr, "failed to mmap evlist\n"); + goto out_evlist; + } + + perf_evlist__enable(evlist); + + start = read_counter(evlist); + while (count--); + end = read_counter(evlist); + + perf_evlist__disable(evlist); + + fprintf(stdout, "start %llu, end %llu, delta %llu\n", + start, end, end-start); + + perf_evlist__close(evlist); +out_evlist: + perf_evlist__delete(evlist); +out_threads: + perf_thread_map__put(threads); + return err; +} diff --git a/tools/lib/perf/include/perf/mmap.h b/tools/lib/perf/include/perf/mmap.h index 9508ad90d8b9..2d0a7e8b13db 100644 --- a/tools/lib/perf/include/perf/mmap.h +++ b/tools/lib/perf/include/perf/mmap.h @@ -12,4 +12,6 @@ LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map); +LIBPERF_API __u64 perf_mmap__read_self(struct perf_mmap *map); + #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 7be1af8a546c..676a73300add 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -46,6 +46,7 @@ LIBPERF_0.0.1 { perf_mmap__read_init; perf_mmap__read_done; perf_mmap__read_event; + perf_mmap__read_self; local: *; }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 79d5ed6c38cc..5a167618f4c5 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -273,3 +273,75 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map) return event; } + +static u64 rdpmc(unsigned int counter) +{ + unsigned int low, high; + + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); + + return low | ((u64)high) << 32; +} + +static u64 rdtsc(void) +{ + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((u64)high) << 32; +} + +__u64 perf_mmap__read_self(struct perf_mmap *map) +{ + struct perf_event_mmap_page *pc = map->base; + u32 seq, idx, time_mult = 0, time_shift = 0; + u64 count, cyc = 0, time_offset = 0, enabled, running, delta; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return ~0; + + do { + seq = pc->lock; + barrier(); + + enabled = pc->time_enabled; + running = pc->time_running; + + if (enabled != running) { + cyc = rdtsc(); + time_mult = pc->time_mult; + time_shift = pc->time_shift; + time_offset = pc->time_offset; + } + + idx = pc->index; + count = pc->offset; + if (idx) + count += rdpmc(idx - 1); + + barrier(); + } while (pc->lock != seq); + + if (enabled != running) { + u64 quot, rem; + + quot = (cyc >> time_shift); + rem = cyc & (((u64)1 << time_shift) - 1); + delta = time_offset + quot * time_mult + + ((rem * time_mult) >> time_shift); + + enabled += delta; + if (idx) + running += delta; + + quot = count / running; + rem = count % running; + count = quot * enabled + (rem * enabled) / running; + } + + return count; +} -- 2.25.1