x86 and arm64 can both support direct access of event counters in
userspace. The access sequence is less than trivial and currently exists
in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in
projects such as PAPI and libpfm4.

In order to support usersapce access, an event must be mmapped. While
there's already mmap support for evlist, the usecase is a bit different
than the self monitoring with userspace access. So let's add a new
perf_evsel__mmap() function to mmap an evsel. This allows implementing
userspace access as a fastpath for perf_evsel__read().

The mmapped address is returned by perf_evsel__mmap() primarily for
users/tests to check if userspace access is enabled.

Signed-off-by: Rob Herring <[email protected]>
---
 tools/lib/perf/Documentation/libperf.txt |  1 +
 tools/lib/perf/evsel.c                   | 33 +++++++++
 tools/lib/perf/include/internal/evsel.h  |  2 +
 tools/lib/perf/include/internal/mmap.h   |  3 +
 tools/lib/perf/include/perf/evsel.h      |  1 +
 tools/lib/perf/libperf.map               |  1 +
 tools/lib/perf/mmap.c                    | 90 ++++++++++++++++++++++++
 tools/lib/perf/tests/test-evsel.c        | 64 +++++++++++++++++
 8 files changed, 195 insertions(+)

diff --git a/tools/lib/perf/Documentation/libperf.txt 
b/tools/lib/perf/Documentation/libperf.txt
index 0c74c30ed23a..ca7478acc97c 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -136,6 +136,7 @@ SYNOPSIS
                        struct perf_thread_map *threads);
   void perf_evsel__close(struct perf_evsel *evsel);
   void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+  void *perf_evsel__mmap(struct perf_evsel *evsel);
   int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
                        struct perf_counts_values *count);
   int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 4dc06289f4c7..b0c94ef4d9b6 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -11,10 +11,12 @@
 #include <stdlib.h>
 #include <internal/xyarray.h>
 #include <internal/cpumap.h>
+#include <internal/mmap.h>
 #include <internal/threadmap.h>
 #include <internal/lib.h>
 #include <linux/string.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 
 void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
 {
@@ -156,6 +158,34 @@ void perf_evsel__close_cpu(struct perf_evsel *evsel, int 
cpu)
        perf_evsel__close_fd_cpu(evsel, cpu);
 }
 
+void *perf_evsel__mmap(struct perf_evsel *evsel)
+{
+       int ret;
+       struct perf_mmap *map;
+       struct perf_mmap_param mp = {
+               .mask = -1,
+               .prot = PROT_READ | PROT_WRITE,
+       };
+
+       if (FD(evsel, 0, 0) < 0)
+               return NULL;
+
+       map = zalloc(sizeof(*map));
+       if (!map)
+               return NULL;
+
+       perf_mmap__init(map, NULL, false, NULL);
+
+       ret = perf_mmap__mmap(map, &mp, FD(evsel, 0, 0), 0);
+       if (ret) {
+               free(map);
+               return NULL;
+       }
+
+       evsel->mmap = map;
+       return map->base;
+}
+
 int perf_evsel__read_size(struct perf_evsel *evsel)
 {
        u64 read_format = evsel->attr.read_format;
@@ -191,6 +221,9 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int 
thread,
        if (FD(evsel, cpu, thread) < 0)
                return -EINVAL;
 
+       if (evsel->mmap && !perf_mmap__read_self(evsel->mmap, count))
+               return 0;
+
        if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
                return -errno;
 
diff --git a/tools/lib/perf/include/internal/evsel.h 
b/tools/lib/perf/include/internal/evsel.h
index 1ffd083b235e..a7985dbb68ff 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -9,6 +9,7 @@
 
 struct perf_cpu_map;
 struct perf_thread_map;
+struct perf_mmap;
 struct xyarray;
 
 /*
@@ -40,6 +41,7 @@ struct perf_evsel {
        struct perf_cpu_map     *cpus;
        struct perf_cpu_map     *own_cpus;
        struct perf_thread_map  *threads;
+       struct perf_mmap        *mmap;
        struct xyarray          *fd;
        struct xyarray          *sample_id;
        u64                     *id;
diff --git a/tools/lib/perf/include/internal/mmap.h 
b/tools/lib/perf/include/internal/mmap.h
index be7556e0a2b2..5e3422f40ed5 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -11,6 +11,7 @@
 #define PERF_SAMPLE_MAX_SIZE (1 << 16)
 
 struct perf_mmap;
+struct perf_counts_values;
 
 typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
 
@@ -52,4 +53,6 @@ void perf_mmap__put(struct perf_mmap *map);
 
 u64 perf_mmap__read_head(struct perf_mmap *map);
 
+int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values 
*count);
+
 #endif /* __LIBPERF_INTERNAL_MMAP_H */
diff --git a/tools/lib/perf/include/perf/evsel.h 
b/tools/lib/perf/include/perf/evsel.h
index c82ec39a4ad0..6d0da962870c 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -27,6 +27,7 @@ LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, 
struct perf_cpu_map *
                                 struct perf_thread_map *threads);
 LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
 LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API void *perf_evsel__mmap(struct perf_evsel *evsel);
 LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
                                 struct perf_counts_values *count);
 LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 7be1af8a546c..733a0647be8b 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -23,6 +23,7 @@ LIBPERF_0.0.1 {
                perf_evsel__disable;
                perf_evsel__open;
                perf_evsel__close;
+               perf_evsel__mmap;
                perf_evsel__read;
                perf_evsel__cpus;
                perf_evsel__threads;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 79d5ed6c38cc..cb07969cfdbf 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -8,9 +8,11 @@
 #include <linux/perf_event.h>
 #include <perf/mmap.h>
 #include <perf/event.h>
+#include <perf/evsel.h>
 #include <internal/mmap.h>
 #include <internal/lib.h>
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include "internal.h"
 
 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@@ -273,3 +275,91 @@ union perf_event *perf_mmap__read_event(struct perf_mmap 
*map)
 
        return event;
 }
+
+#if defined(__i386__) || defined(__x86_64__)
+static u64 read_perf_counter(unsigned int counter)
+{
+       unsigned int low, high;
+
+       asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+       return low | ((u64)high) << 32;
+}
+
+static u64 read_timestamp(void)
+{
+       unsigned int low, high;
+
+       asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+       return low | ((u64)high) << 32;
+}
+#else
+static u64 read_perf_counter(unsigned int counter) { return 0; }
+static u64 read_timestamp(void) { return 0; }
+#endif
+
+int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values 
*count)
+{
+       struct perf_event_mmap_page *pc = map->base;
+       u32 seq, idx, time_mult = 0, time_shift = 0;
+       u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
+
+       BUG_ON(!pc);
+
+       if (!pc->cap_user_rdpmc)
+               return -1;
+
+       do {
+               seq = READ_ONCE(pc->lock);
+               barrier();
+
+               count->ena = READ_ONCE(pc->time_enabled);
+               count->run = READ_ONCE(pc->time_running);
+
+               if (pc->cap_user_time && count->ena != count->run) {
+                       cyc = read_timestamp();
+                       time_mult = READ_ONCE(pc->time_mult);
+                       time_shift = READ_ONCE(pc->time_shift);
+                       time_offset = READ_ONCE(pc->time_offset);
+
+                       if (pc->cap_user_time_short) {
+                               time_cycles = READ_ONCE(pc->time_cycles);
+                               time_mask = READ_ONCE(pc->time_mask);
+                       }
+               }
+
+               idx = READ_ONCE(pc->index);
+               cnt = READ_ONCE(pc->offset);
+               if (pc->cap_user_rdpmc && idx) {
+                       u64 evcnt = read_perf_counter(idx - 1);
+                       u16 width = READ_ONCE(pc->pmc_width);
+
+                       evcnt <<= 64 - width;
+                       evcnt >>= 64 - width;
+                       cnt += evcnt;
+               } else
+                       return -1;
+
+               barrier();
+       } while (READ_ONCE(pc->lock) != seq);
+
+       if (count->ena != count->run) {
+               u64 delta;
+
+               /* Adjust for cap_usr_time_short, a nop if not */
+               cyc = time_cycles + ((cyc - time_cycles) & time_mask);
+
+               delta = time_offset + mul_u64_u32_shr(cyc, time_mult, 
time_shift);
+
+               count->ena += delta;
+               if (idx)
+                       count->run += delta;
+
+               cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
+       }
+
+       count->val = cnt;
+
+       return 0;
+}
diff --git a/tools/lib/perf/tests/test-evsel.c 
b/tools/lib/perf/tests/test-evsel.c
index 135722ac965b..fd637d23216b 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -120,6 +120,68 @@ static int test_stat_thread_enable(void)
        return 0;
 }
 
+static int test_stat_user_read(int event)
+{
+       struct perf_counts_values counts = { .val = 0 };
+       struct perf_thread_map *threads;
+       struct perf_evsel *evsel;
+       struct perf_event_mmap_page *pc;
+       struct perf_event_attr attr = {
+               .type   = PERF_TYPE_HARDWARE,
+               .config = event,
+       };
+       int err, i;
+
+       threads = perf_thread_map__new_dummy();
+       __T("failed to create threads", threads);
+
+       perf_thread_map__set_pid(threads, 0, 0);
+
+       evsel = perf_evsel__new(&attr);
+       __T("failed to create evsel", evsel);
+
+       err = perf_evsel__open(evsel, NULL, threads);
+       __T("failed to open evsel", err == 0);
+
+       pc = perf_evsel__mmap(evsel);
+       __T("failed to mmap evsel", pc);
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+       __T("userspace counter access not supported", pc->cap_user_rdpmc);
+       __T("userspace counter access not enabled", pc->index);
+       __T("userspace counter width not set", pc->pmc_width >= 32);
+#endif
+
+       perf_evsel__read(evsel, 0, 0, &counts);
+       __T("failed to read value for evsel", counts.val != 0);
+
+       fputs("\n", stderr);
+       for (i = 0; i < 5; i++) {
+               volatile int count = 0x10000 << i;
+               __u64 start, end, last = 0;
+
+               fprintf(stderr, "\tloop = %u, ", count);
+
+               perf_evsel__read(evsel, 0, 0, &counts);
+               start = counts.val;
+
+               while (count--) ;
+
+               perf_evsel__read(evsel, 0, 0, &counts);
+               end = counts.val;
+
+               __T("invalid counter data", (end - start) > last);
+               last = end - start;
+               fprintf(stderr, "count = %llu\n", end - start);
+       }
+
+       perf_evsel__close(evsel);
+       perf_evsel__delete(evsel);
+
+       perf_thread_map__put(threads);
+       return 0;
+}
+
 int main(int argc, char **argv)
 {
        __T_START;
@@ -129,6 +191,8 @@ int main(int argc, char **argv)
        test_stat_cpu();
        test_stat_thread();
        test_stat_thread_enable();
+       test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
+       test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
 
        __T_END;
        return 0;
-- 
2.25.1

Reply via email to