On Tue, Nov 10, 2020 at 11:10:46AM +0100, Jiri Olsa wrote: > On Tue, Nov 10, 2020 at 09:28:51AM +0100, Peter Zijlstra wrote: > > On Mon, Nov 09, 2020 at 10:53:54PM +0100, Jiri Olsa wrote: > > > There's new misc bit for mmap2 to signal there's build > > > id data in it: > > > > > > #define PERF_RECORD_MISC_BUILD_ID (1 << 14) > > > > PERF_RECORD_MISC_MMAP_BUILD_ID would be consistent with the existing > > PERF_RECORD_MISC_MMAP_DATA naming. > > ok > > > > > Also, AFAICT there's still a bunch of unused bits in misc. > > > > 012 CDEF > > |||---------|||| > > > > Where: > > 0-2 CPUMODE_MASK > > > > C PROC_MAP_PARSE_TIMEOUT > > D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT > > E EXACT_IP / SCHED_OUT_PREEMPT > > F (reserved) > > > > Maybe we should put in a comment to keep track of the hole ? > > ook
how about the change below.. I also switch the build_id with the size, but I kept the build_id size 20, because I think there's bigger chance we will use those reserved bytes for something, than that we will need those extra 3 bytes in build_id array struct { u8 build_id_size; u8 __reserved_1; u16 __reserved_2; u8 build_id[20]; }; jirka --- diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b95d3c485d27..45a216bea048 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -384,7 +384,8 @@ struct perf_event_attr { aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - __reserved_1 : 30; + build_id : 1, /* use build id in mmap2 events */ + __reserved_1 : 29; union { __u32 wakeup_events; /* wakeup every n events */ @@ -657,6 +658,22 @@ struct perf_event_mmap_page { __u64 aux_size; }; +/* + * The current state of perf_event_header::misc bits usage: + * ('|' used bit, '-' unused bit) + * + * 012 CDEF + * |||---------|||| + * + * Where: + * 0-2 CPUMODE_MASK + * + * C PROC_MAP_PARSE_TIMEOUT + * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT + * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT + * F (reserved) + */ + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) @@ -688,6 +705,7 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event * * * PERF_RECORD_MISC_EXACT_IP: @@ -697,9 +715,13 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: * Indicates that thread was preempted in TASK_RUNNING state. + * + * PERF_RECORD_MISC_MMAP_BUILD_ID: + * Indicates that mmap2 event carries build id data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) +#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ @@ -911,10 +933,20 @@ enum perf_event_type { * u64 addr; * u64 len; * u64 pgoff; - * u32 maj; - * u32 min; - * u64 ino; - * u64 ino_generation; + * union { + * struct { + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * }; + * struct { + * u8 build_id_size; + * u8 __reserved_1; + * u16 __reserved_2; + * u8 build_id[20]; + * }; + * }; * u32 prot, flags; * char filename[]; * struct sample_id sample_id; diff --git a/kernel/events/core.c b/kernel/events/core.c index da467e1dd49a..5b2b8ec82399 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -51,6 +51,7 @@ #include <linux/proc_ns.h> #include <linux/mount.h> #include <linux/min_heap.h> +#include <linux/buildid.h> #include "internal.h" @@ -395,6 +396,7 @@ static atomic_t nr_ksymbol_events __read_mostly; static atomic_t nr_bpf_events __read_mostly; static atomic_t nr_cgroup_events __read_mostly; static atomic_t nr_text_poke_events __read_mostly; +static atomic_t nr_build_id_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4672,6 +4674,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); + if (event->attr.build_id) + atomic_dec(&nr_build_id_events); if (event->attr.comm) atomic_dec(&nr_comm_events); if (event->attr.namespaces) @@ -7942,6 +7946,8 @@ struct perf_mmap_event { u64 ino; u64 ino_generation; u32 prot, flags; + u8 build_id[BUILD_ID_SIZE]; + u32 build_id_size; struct { struct perf_event_header header; @@ -7997,13 +8003,23 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.pid = perf_event_pid(event, current); mmap_event->event_id.tid = perf_event_tid(event, current); + if (event->attr.mmap2 && event->attr.build_id) + mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID; + perf_output_put(&handle, mmap_event->event_id); if (event->attr.mmap2) { - perf_output_put(&handle, mmap_event->maj); - perf_output_put(&handle, mmap_event->min); - perf_output_put(&handle, mmap_event->ino); - perf_output_put(&handle, mmap_event->ino_generation); + if (event->attr.build_id) { + u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 }; + + __output_copy(&handle, size, 4); + __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE); + } else { + perf_output_put(&handle, mmap_event->maj); + perf_output_put(&handle, mmap_event->min); + perf_output_put(&handle, mmap_event->ino); + perf_output_put(&handle, mmap_event->ino_generation); + } perf_output_put(&handle, mmap_event->prot); perf_output_put(&handle, mmap_event->flags); } @@ -8132,6 +8148,9 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; + if (atomic_read(&nr_build_id_events)) + build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size); + perf_iterate_sb(perf_event_mmap_output, mmap_event, NULL); @@ -11069,6 +11088,8 @@ static void account_event(struct perf_event *event) inc = true; if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); + if (event->attr.build_id) + atomic_inc(&nr_build_id_events); if (event->attr.comm) atomic_inc(&nr_comm_events); if (event->attr.namespaces) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 768888b9326a..1bcf51e24979 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -482,6 +482,9 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--buildid-mmap:: +Record build ids in mmap2 events, disables build id cache (implies --no-buildid). + --aio[=n]:: Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). Asynchronous mode is supported only when linking Perf tool with libc library diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adf311d15d3d..47bae9d82d43 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -102,6 +102,7 @@ struct record { bool no_buildid_cache; bool no_buildid_cache_set; bool buildid_all; + bool buildid_mmap; bool timestamp_filename; bool timestamp_boundary; struct switch_output switch_output; @@ -2139,6 +2140,8 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->no_buildid_cache = true; else if (!strcmp(value, "skip")) rec->no_buildid = true; + else if (!strcmp(value, "mmap")) + rec->buildid_mmap = true; else return -1; return 0; @@ -2554,6 +2557,8 @@ static struct option __record_options[] = { "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"), + OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, + "Record build-id in map events"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, @@ -2657,6 +2662,21 @@ int cmd_record(int argc, const char **argv) } + if (rec->buildid_mmap) { + if (!perf_can_record_build_id()) { + pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); + err = -EINVAL; + goto out_opts; + } + pr_debug("Enabling build id in mmap2 events.\n"); + /* Enable mmap build id synthesizing. */ + symbol_conf.buildid_mmap2 = true; + /* Enable perf_event_attr::build_id bit. */ + rec->opts.build_id = true; + /* Disable build id cache. */ + rec->no_buildid = true; + } + if (rec->opts.kcore) rec->data.is_dir = true; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cad6051d8b0..749d806ee1d1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1170,10 +1170,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->sample_weight) evsel__set_sample_bit(evsel, WEIGHT); - attr->task = track; - attr->mmap = track; - attr->mmap2 = track && !perf_missing_features.mmap2; - attr->comm = track; + attr->task = track; + attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; + attr->comm = track; + attr->build_id = track && opts->build_id; + /* * ksymbol is tracked separately with text poke because it needs to be * system wide and enabled immediately. diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 3840d02f0f7b..829af17a0867 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -98,6 +98,11 @@ static void perf_probe_text_poke(struct evsel *evsel) evsel->core.attr.text_poke = 1; } +static void perf_probe_build_id(struct evsel *evsel) +{ + evsel->core.attr.build_id = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -172,3 +177,8 @@ bool perf_can_aux_sample(void) return true; } + +bool perf_can_record_build_id(void) +{ + return perf_probe_api(perf_probe_build_id); +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index d5506a983a94..f12ca55f509a 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -11,5 +11,6 @@ bool perf_can_record_cpu_wide(void); bool perf_can_record_switch_events(void); bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); +bool perf_can_record_build_id(void); #endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index e67a227c0ce7..0f1c62d40a89 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -134,6 +134,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRf(aux_output, p_unsigned); PRINT_ATTRf(cgroup, p_unsigned); + PRINT_ATTRf(build_id, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 266760ac9143..609e706f4282 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -49,6 +49,7 @@ struct record_opts { bool no_bpf_event; bool kcore; bool text_poke; + bool build_id; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages;