Introduce perf-stat -b option, which counts events for BPF programs, like: [root@localhost ~]# ~/perf stat -e ref-cycles,cycles -b 254 -I 1000 1.487903822 115,200 ref-cycles 1.487903822 86,012 cycles 2.489147029 80,560 ref-cycles 2.489147029 73,784 cycles 3.490341825 60,720 ref-cycles 3.490341825 37,797 cycles 4.491540887 37,120 ref-cycles 4.491540887 31,963 cycles
The example above counts cycles and ref-cycles of BPF program of id 254. This is similar to bpftool-prog-profile command, but more flexible. perf-stat -b creates per-cpu perf_event and loads fentry/fexit BPF programs (monitor-progs) to the target BPF program (target-prog). The monitor-progs read perf_event before and after the target-prog, and aggregate the difference in a BPF map. Then the user space reads data from these maps. A new struct bpf_counter is introduced to provide common interface that uses BPF programs/maps to count perf events. Signed-off-by: Song Liu <songliubrav...@fb.com> --- tools/perf/Makefile.perf | 2 +- tools/perf/builtin-stat.c | 59 ++++- tools/perf/util/Build | 1 + tools/perf/util/bpf_counter.c | 215 ++++++++++++++++++ tools/perf/util/bpf_counter.h | 71 ++++++ .../util/bpf_skel/bpf_prog_profiler.bpf.c | 96 ++++++++ tools/perf/util/evsel.c | 10 + tools/perf/util/evsel.h | 5 + tools/perf/util/target.h | 6 + 9 files changed, 456 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/bpf_counter.c create mode 100644 tools/perf/util/bpf_counter.h create mode 100644 tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 37b7ffe1db27c..d926f0c35ed46 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1013,7 +1013,7 @@ python-clean: SKEL_OUT := $(abspath util/bpf_skel) SKEL_TMP_OUT := $(abspath util/bpf_skel/.tmp) -SKELETONS := $(SKEL_OUT)/dummy.skel.h +SKELETONS := $(SKEL_OUT)/dummy.skel.h $(SKEL_OUT)/bpf_prog_profiler.skel.h ifdef BUILD_BPF_SKEL CLANG ?= clang diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f15b2f8aa14d8..d6df04cc24073 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,6 +67,7 @@ #include "util/top.h" #include "util/affinity.h" #include "util/pfm.h" +#include "util/bpf_counter.h" #include "asm/bug.h" #include <linux/time64.h> @@ -409,12 +410,31 @@ static int read_affinity_counters(struct timespec *rs) return 0; } +static int read_bpf_map_counters(void) +{ + struct evsel *counter; + int err; + + evlist__for_each_entry(evsel_list, counter) { + err = bpf_counter__read(counter); + if (err) + return err; + } + return 0; +} + static void read_counters(struct timespec *rs) { struct evsel *counter; + int err; - if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0)) - return; + if (!stat_config.stop_read_counter) { + err = read_bpf_map_counters(); + if (err == -EAGAIN) + err = read_affinity_counters(rs); + if (err < 0) + return; + } evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -496,11 +516,20 @@ static bool handle_interval(unsigned int interval, int *times) return false; } -static void enable_counters(void) +static int enable_counters(void) { + struct evsel *evsel; + int err; + + evlist__for_each_entry(evsel_list, evsel) { + err = bpf_counter__enable(evsel); + if (err) + return err; + } + if (stat_config.initial_delay < 0) { pr_info(EVLIST_DISABLED_MSG); - return; + return 0; } if (stat_config.initial_delay > 0) { @@ -518,6 +547,7 @@ static void enable_counters(void) if (stat_config.initial_delay > 0) pr_info(EVLIST_ENABLED_MSG); } + return 0; } static void disable_counters(void) @@ -720,7 +750,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; struct affinity affinity; - int i, cpu; + int i, cpu, err; bool second_pass = false; if (forks) { @@ -738,6 +768,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (affinity__setup(&affinity) < 0) return -1; + evlist__for_each_entry(evsel_list, counter) { + if (bpf_counter__load(counter, &target)) + return -1; + } + evlist__for_each_cpu (evsel_list, i, cpu) { affinity__set(&affinity, cpu); @@ -851,7 +886,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (STAT_RECORD) { - int err, fd = perf_data__fd(&perf_stat.data); + int fd = perf_data__fd(&perf_stat.data); if (is_pipe) { err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); @@ -877,7 +912,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (forks) { perf_evlist__start_workload(evsel_list); - enable_counters(); + err = enable_counters(); + if (err) + return -1; if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) status = dispatch_events(forks, timeout, interval, ×); @@ -896,7 +933,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (WIFSIGNALED(status)) psignal(WTERMSIG(status), argv[0]); } else { - enable_counters(); + err = enable_counters(); + if (err) + return -1; status = dispatch_events(forks, timeout, interval, ×); } @@ -1087,6 +1126,10 @@ static struct option stat_options[] = { "stat events on existing process id"), OPT_STRING('t', "tid", &target.tid, "tid", "stat events on existing thread id"), +#ifdef BUILD_BPF_SKEL + OPT_STRING('b', "bpf-prog", &target.bpf_prog_id, "bpf-prog-id", + "stat events on existing bpf program id"), +#endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e2563d0154eb6..188521f343470 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -135,6 +135,7 @@ perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c new file mode 100644 index 0000000000000..25456a179af26 --- /dev/null +++ b/tools/perf/util/bpf_counter.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2019 Facebook */ + +#include <limits.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <linux/err.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> + +#include "bpf_counter.h" +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "target.h" + +#include "bpf_skel/bpf_prog_profiler.skel.h" + +static inline void *u64_to_ptr(__u64 ptr) +{ + return (void *)(unsigned long)ptr; +} + +static char *bpf_target_prog_name(int tgt_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; + const struct btf_type *t; + char *name = NULL; + struct btf *btf; + + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + pr_debug2("failed to get info_linear for prog FD %d", tgt_fd); + return NULL; + } + + if (info_linear->info.btf_id == 0 || + btf__get_from_id(info_linear->info.btf_id, &btf)) { + pr_debug2("prog FD %d doesn't have valid btf", tgt_fd); + goto out; + } + + func_info = u64_to_ptr(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); + if (!t) { + pr_debug2("btf %d doesn't have type %d", + info_linear->info.btf_id, func_info[0].type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); +out: + free(info_linear); + return name; +} + +static void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + +static int bpf_program_profiler__load(struct evsel *evsel, struct target *target) +{ + struct bpf_prog_profiler_bpf *skel; + struct bpf_program *prog; + char *prog_name; + char *end_ptr; + u32 prog_id; + int prog_fd; + int err; + + prog_id = strtoul(target->bpf_prog_id, &end_ptr, 10); + if (prog_id == 0 || prog_id == UINT_MAX || *end_ptr != '\0') { + pr_debug("Failed to parse bpf prog id %s\n", target->bpf_prog_id); + return -1; + } + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + pr_debug("Failed to open fd for bpf prog %u\n", prog_id); + return -1; + } + + skel = bpf_prog_profiler_bpf__open(); + if (!skel) { + pr_debug("Failed to load bpf skeleton\n"); + return -1; + } + skel->rodata->num_cpu = evsel__nr_cpus(evsel); + + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__resize(skel->maps.fentry_readings, 1); + bpf_map__resize(skel->maps.accum_readings, 1); + + prog_name = bpf_target_prog_name(prog_fd); + + bpf_object__for_each_program(prog, skel->obj) { + err = bpf_program__set_attach_target(prog, prog_fd, prog_name); + if (err) + pr_debug("bpf_program__set_attach_target failed\n"); + } + set_max_rlimit(); + err = bpf_prog_profiler_bpf__load(skel); + if (err) + pr_debug("bpf_prog_profiler_bpf__load failed\n"); + + evsel->bpf_counter.skel = skel; + return 0; +} + +static int bpf_program_profiler__enable(struct evsel *evsel) +{ + struct bpf_prog_profiler_bpf *skel = evsel->bpf_counter.skel; + + return bpf_prog_profiler_bpf__attach(skel); +} + +static int bpf_program_profiler__read(struct evsel *evsel) +{ + struct bpf_prog_profiler_bpf *skel = evsel->bpf_counter.skel; + int num_cpu = evsel__nr_cpus(evsel); + struct bpf_perf_event_value values[num_cpu]; + int reading_map_fd; + __u32 key = 0; + int err, cpu; + + if (!skel) + return -EAGAIN; + + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); + + err = bpf_map_lookup_elem(reading_map_fd, &key, values); + if (err) { + fprintf(stderr, "failed to read value\n"); + return err; + } + + for (cpu = 0; cpu < num_cpu; cpu++) { + perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; + } + + return 0; +} + +static int bpf_program_profiler__destroy(struct evsel *evsel) +{ + struct bpf_prog_profiler_bpf *skel = evsel->bpf_counter.skel; + + bpf_prog_profiler_bpf__destroy(skel); + return 0; +} + +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, + int fd) +{ + struct bpf_prog_profiler_bpf *skel = evsel->bpf_counter.skel; + + return bpf_map_update_elem(bpf_map__fd(skel->maps.events), + &cpu, &fd, BPF_ANY); +} + +struct bpf_counter_ops bpf_program_profiler_ops = { + .load = bpf_program_profiler__load, + .enable = bpf_program_profiler__enable, + .read = bpf_program_profiler__read, + .destroy = bpf_program_profiler__destroy, + .install_pe = bpf_program_profiler__install_pe, +}; + +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) +{ + if (!evsel->bpf_counter.skel) + return 0; + return evsel->bpf_counter.ops->install_pe(evsel, cpu, fd); +} + +int bpf_counter__load(struct evsel *evsel, struct target *target) +{ + if (target__has_bpf(target)) + evsel->bpf_counter.ops = &bpf_program_profiler_ops; + + if (evsel->bpf_counter.ops) + return evsel->bpf_counter.ops->load(evsel, target); + return 0; +} + +int bpf_counter__enable(struct evsel *evsel) +{ + if (!evsel->bpf_counter.skel) + return 0; + return evsel->bpf_counter.ops->enable(evsel); +} + +int bpf_counter__read(struct evsel *evsel) +{ + if (!evsel->bpf_counter.skel) + return -EAGAIN; + return evsel->bpf_counter.ops->read(evsel); +} + +int bpf_counter__destroy(struct evsel *evsel) +{ + if (!evsel->bpf_counter.skel) + return 0; + evsel->bpf_counter.ops->destroy(evsel); + evsel->bpf_counter.skel = NULL; + evsel->bpf_counter.ops = NULL; + return 0; +} diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h new file mode 100644 index 0000000000000..9f8f9bd3ec6e2 --- /dev/null +++ b/tools/perf/util/bpf_counter.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_BPF_COUNTER_H +#define __PERF_BPF_COUNTER_H 1 + +struct evsel; +struct target; +struct bpf_counter; + +typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); +typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, + struct target *target); +typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, + int cpu, + int fd); + +struct bpf_counter_ops { + bpf_counter_evsel_target_op load; + bpf_counter_evsel_op enable; + bpf_counter_evsel_op read; + bpf_counter_evsel_op destroy; + bpf_counter_evsel_install_pe_op install_pe; +}; + +struct bpf_counter { + void *skel; + struct bpf_counter_ops *ops; +}; + +#ifdef BUILD_BPF_SKEL + +int bpf_counter__load(struct evsel *evsel, struct target *target); +int bpf_counter__enable(struct evsel *evsel); +int bpf_counter__read(struct evsel *evsel); +int bpf_counter__destroy(struct evsel *evsel); +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); + +#else + +#include<linux/err.h> + +static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, + struct target *target __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__read(struct evsel *evsel __maybe_unused) +{ + return -EAGAIN; +} + +static inline int bpf_counter__destroy(struct evsel *evsel __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, + int cpu __maybe_unused, + int fd __maybe_unused) +{ + return 0; +} + +#endif /* BUILD_BPF_SKEL */ + +#endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c new file mode 100644 index 0000000000000..cdde2218af86a --- /dev/null +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2020 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* map of perf event fds, num_cpu * num_metric entries */ +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +/* readings at fentry */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} fentry_readings SEC(".maps"); + +/* accumulated readings */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); + +const volatile __u32 num_cpu = 1; + +SEC("fentry/XXX") +int BPF_PROG(fentry_XXX) +{ + __u32 key = bpf_get_smp_processor_id(); + struct bpf_perf_event_value reading; + struct bpf_perf_event_value *ptr; + __u32 zero = 0; + long err; + + /* look up before reading, to reduce error */ + ptr = bpf_map_lookup_elem(&fentry_readings, &zero); + if (!ptr) + return 0; + + err = bpf_perf_event_read_value(&events, key, &reading, + sizeof(reading)); + if (err) + return 0; + + *ptr = reading; + return 0; +} + +static inline void +fexit_update_maps(struct bpf_perf_event_value *after) +{ + struct bpf_perf_event_value *before, diff, *accum; + __u32 zero = 0; + + before = bpf_map_lookup_elem(&fentry_readings, &zero); + /* only account samples with a valid fentry_reading */ + if (before && before->counter) { + struct bpf_perf_event_value *accum; + + diff.counter = after->counter - before->counter; + diff.enabled = after->enabled - before->enabled; + diff.running = after->running - before->running; + + accum = bpf_map_lookup_elem(&accum_readings, &zero); + if (accum) { + accum->counter += diff.counter; + accum->enabled += diff.enabled; + accum->running += diff.running; + } + } +} + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value reading; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 one = 1, zero = 0; + int err; + + /* read all events before updating the maps, to reduce error */ + err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading)); + if (err) + return 0; + + fexit_update_maps(&reading); + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cad6051d8b08..6376b8db58e09 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -25,6 +25,7 @@ #include <stdlib.h> #include <perf/evsel.h> #include "asm/bug.h" +#include "bpf_counter.h" #include "callchain.h" #include "cgroup.h" #include "counts.h" @@ -51,6 +52,10 @@ #include <internal/lib.h> #include <linux/ctype.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "rlimit.h" struct perf_missing_features perf_missing_features; @@ -1365,6 +1370,7 @@ void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + bpf_counter__destroy(evsel); evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); @@ -1770,6 +1776,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, evsel->core.attr.sample_id_all = 0; display_attr(&evsel->core.attr); + if (evsel->bpf_counter.skel) + evsel->core.attr.inherit = 0; for (cpu = start_cpu; cpu < end_cpu; cpu++) { @@ -1788,6 +1796,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, FD(evsel, cpu, thread) = fd; + bpf_counter__install_pe(evsel, cpu, fd); + if (unlikely(test_attr__enabled)) { test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], fd, group_fd, flags); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 79a860d8e3eef..3a44f7b25726c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -10,6 +10,7 @@ #include <internal/evsel.h> #include <perf/evsel.h> #include "symbol_conf.h" +#include "bpf_counter.h" #include <internal/cpumap.h> struct bpf_object; @@ -17,6 +18,8 @@ struct cgroup; struct perf_counts; struct perf_stat_evsel; union perf_event; +struct bpf_counter; +struct target; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -127,6 +130,7 @@ struct evsel { * See also evsel__has_callchain(). */ __u64 synth_sample_type; + struct bpf_counter bpf_counter; }; struct perf_missing_features { @@ -423,4 +427,5 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 6ef01a83b24e9..cdaa7510f918b 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -10,6 +10,7 @@ struct target { const char *tid; const char *cpu_list; const char *uid_str; + const char *bpf_prog_id; uid_t uid; bool system_wide; bool uses_mmap; @@ -59,6 +60,11 @@ static inline bool target__has_cpu(struct target *target) return target->system_wide || target->cpu_list; } +static inline bool target__has_bpf(struct target *target) +{ + return target->bpf_prog_id; +} + static inline bool target__none(struct target *target) { return !target__has_task(target) && !target__has_cpu(target); -- 2.24.1