On Tue, Nov 10, 2020 at 11:10:46AM +0100, Jiri Olsa wrote:
> On Tue, Nov 10, 2020 at 09:28:51AM +0100, Peter Zijlstra wrote:
> > On Mon, Nov 09, 2020 at 10:53:54PM +0100, Jiri Olsa wrote:
> > > There's new misc bit for mmap2 to signal there's build
> > > id data in it:
> > > 
> > >   #define PERF_RECORD_MISC_BUILD_ID              (1 << 14)
> > 
> > PERF_RECORD_MISC_MMAP_BUILD_ID would be consistent with the existing
> > PERF_RECORD_MISC_MMAP_DATA naming.
> 
> ok
> 
> > 
> > Also, AFAICT there's still a bunch of unused bits in misc.
> > 
> >     012         CDEF
> >     |||---------||||
> > 
> > Where:
> >     0-2     CPUMODE_MASK
> > 
> >     C       PROC_MAP_PARSE_TIMEOUT
> >     D       MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
> >     E       EXACT_IP / SCHED_OUT_PREEMPT
> >     F       (reserved)
> > 
> > Maybe we should put in a comment to keep track of the hole ?
> 
> ook

how about the change below.. I also switch the build_id with the size,
but I kept the build_id size 20, because I think there's bigger chance
we will use those reserved bytes for something, than that we will need
those extra 3 bytes in build_id array

  struct {
          u8              build_id_size;
          u8              __reserved_1;
          u16             __reserved_2;
          u8              build_id[20];
  };

jirka


---
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b95d3c485d27..45a216bea048 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -384,7 +384,8 @@ struct perf_event_attr {
                                aux_output     :  1, /* generate AUX records 
instead of events */
                                cgroup         :  1, /* include cgroup events */
                                text_poke      :  1, /* include text poke 
events */
-                               __reserved_1   : 30;
+                               build_id       :  1, /* use build id in mmap2 
events */
+                               __reserved_1   : 29;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -657,6 +658,22 @@ struct perf_event_mmap_page {
        __u64   aux_size;
 };
 
+/*
+ * The current state of perf_event_header::misc bits usage:
+ * ('|' used bit, '-' unused bit)
+ *
+ *  012         CDEF
+ *  |||---------||||
+ *
+ *  Where:
+ *    0-2     CPUMODE_MASK
+ *
+ *    C       PROC_MAP_PARSE_TIMEOUT
+ *    D       MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
+ *    E       MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT
+ *    F       (reserved)
+ */
+
 #define PERF_RECORD_MISC_CPUMODE_MASK          (7 << 0)
 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN       (0 << 0)
 #define PERF_RECORD_MISC_KERNEL                        (1 << 0)
@@ -688,6 +705,7 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_EXACT_IP           - PERF_RECORD_SAMPLE of precise events
  *   PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *   PERF_RECORD_MISC_MMAP_BUILD_ID      - PERF_RECORD_MMAP2 event
  *
  *
  * PERF_RECORD_MISC_EXACT_IP:
@@ -697,9 +715,13 @@ struct perf_event_mmap_page {
  *
  * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
  *   Indicates that thread was preempted in TASK_RUNNING state.
+ *
+ * PERF_RECORD_MISC_MMAP_BUILD_ID:
+ *   Indicates that mmap2 event carries build id data.
  */
 #define PERF_RECORD_MISC_EXACT_IP              (1 << 14)
 #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT    (1 << 14)
+#define PERF_RECORD_MISC_MMAP_BUILD_ID         (1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
@@ -911,10 +933,20 @@ enum perf_event_type {
         *      u64                             addr;
         *      u64                             len;
         *      u64                             pgoff;
-        *      u32                             maj;
-        *      u32                             min;
-        *      u64                             ino;
-        *      u64                             ino_generation;
+        *      union {
+        *              struct {
+        *                      u32             maj;
+        *                      u32             min;
+        *                      u64             ino;
+        *                      u64             ino_generation;
+        *              };
+        *              struct {
+        *                      u8              build_id_size;
+        *                      u8              __reserved_1;
+        *                      u16             __reserved_2;
+        *                      u8              build_id[20];
+        *              };
+        *      };
         *      u32                             prot, flags;
         *      char                            filename[];
         *      struct sample_id                sample_id;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index da467e1dd49a..5b2b8ec82399 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -51,6 +51,7 @@
 #include <linux/proc_ns.h>
 #include <linux/mount.h>
 #include <linux/min_heap.h>
+#include <linux/buildid.h>
 
 #include "internal.h"
 
@@ -395,6 +396,7 @@ static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
 static atomic_t nr_cgroup_events __read_mostly;
 static atomic_t nr_text_poke_events __read_mostly;
+static atomic_t nr_build_id_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4672,6 +4674,8 @@ static void unaccount_event(struct perf_event *event)
                dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
+       if (event->attr.build_id)
+               atomic_dec(&nr_build_id_events);
        if (event->attr.comm)
                atomic_dec(&nr_comm_events);
        if (event->attr.namespaces)
@@ -7942,6 +7946,8 @@ struct perf_mmap_event {
        u64                     ino;
        u64                     ino_generation;
        u32                     prot, flags;
+       u8                      build_id[BUILD_ID_SIZE];
+       u32                     build_id_size;
 
        struct {
                struct perf_event_header        header;
@@ -7997,13 +8003,23 @@ static void perf_event_mmap_output(struct perf_event 
*event,
        mmap_event->event_id.pid = perf_event_pid(event, current);
        mmap_event->event_id.tid = perf_event_tid(event, current);
 
+       if (event->attr.mmap2 && event->attr.build_id)
+               mmap_event->event_id.header.misc |= 
PERF_RECORD_MISC_MMAP_BUILD_ID;
+
        perf_output_put(&handle, mmap_event->event_id);
 
        if (event->attr.mmap2) {
-               perf_output_put(&handle, mmap_event->maj);
-               perf_output_put(&handle, mmap_event->min);
-               perf_output_put(&handle, mmap_event->ino);
-               perf_output_put(&handle, mmap_event->ino_generation);
+               if (event->attr.build_id) {
+                       u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 
};
+
+                       __output_copy(&handle, size, 4);
+                       __output_copy(&handle, mmap_event->build_id, 
BUILD_ID_SIZE);
+               } else {
+                       perf_output_put(&handle, mmap_event->maj);
+                       perf_output_put(&handle, mmap_event->min);
+                       perf_output_put(&handle, mmap_event->ino);
+                       perf_output_put(&handle, mmap_event->ino_generation);
+               }
                perf_output_put(&handle, mmap_event->prot);
                perf_output_put(&handle, mmap_event->flags);
        }
@@ -8132,6 +8148,9 @@ static void perf_event_mmap_event(struct perf_mmap_event 
*mmap_event)
 
        mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
+       if (atomic_read(&nr_build_id_events))
+               build_id_parse(vma, mmap_event->build_id, 
&mmap_event->build_id_size);
+
        perf_iterate_sb(perf_event_mmap_output,
                       mmap_event,
                       NULL);
@@ -11069,6 +11088,8 @@ static void account_event(struct perf_event *event)
                inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
+       if (event->attr.build_id)
+               atomic_inc(&nr_build_id_events);
        if (event->attr.comm)
                atomic_inc(&nr_comm_events);
        if (event->attr.namespaces)
diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 768888b9326a..1bcf51e24979 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -482,6 +482,9 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--buildid-mmap::
+Record build ids in mmap2 events, disables build id cache (implies 
--no-buildid).
+
 --aio[=n]::
 Use <n> control blocks in asynchronous (Posix AIO) trace writing mode 
(default: 1, max: 4).
 Asynchronous mode is supported only when linking Perf tool with libc library
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index adf311d15d3d..47bae9d82d43 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -102,6 +102,7 @@ struct record {
        bool                    no_buildid_cache;
        bool                    no_buildid_cache_set;
        bool                    buildid_all;
+       bool                    buildid_mmap;
        bool                    timestamp_filename;
        bool                    timestamp_boundary;
        struct switch_output    switch_output;
@@ -2139,6 +2140,8 @@ static int perf_record_config(const char *var, const char 
*value, void *cb)
                        rec->no_buildid_cache = true;
                else if (!strcmp(value, "skip"))
                        rec->no_buildid = true;
+               else if (!strcmp(value, "mmap"))
+                       rec->buildid_mmap = true;
                else
                        return -1;
                return 0;
@@ -2554,6 +2557,8 @@ static struct option __record_options[] = {
                   "file", "vmlinux pathname"),
        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
                    "Record build-id of all DSOs regardless of hits"),
+       OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
+                   "Record build-id in map events"),
        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
                    "append timestamp to output filename"),
        OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
@@ -2657,6 +2662,21 @@ int cmd_record(int argc, const char **argv)
 
        }
 
+       if (rec->buildid_mmap) {
+               if (!perf_can_record_build_id()) {
+                       pr_err("Failed: no support to record build id in mmap 
events, update your kernel.\n");
+                       err = -EINVAL;
+                       goto out_opts;
+               }
+               pr_debug("Enabling build id in mmap2 events.\n");
+               /* Enable mmap build id synthesizing. */
+               symbol_conf.buildid_mmap2 = true;
+               /* Enable perf_event_attr::build_id bit. */
+               rec->opts.build_id = true;
+               /* Disable build id cache. */
+               rec->no_buildid = true;
+       }
+
        if (rec->opts.kcore)
                rec->data.is_dir = true;
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1cad6051d8b0..749d806ee1d1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1170,10 +1170,12 @@ void evsel__config(struct evsel *evsel, struct 
record_opts *opts,
        if (opts->sample_weight)
                evsel__set_sample_bit(evsel, WEIGHT);
 
-       attr->task  = track;
-       attr->mmap  = track;
-       attr->mmap2 = track && !perf_missing_features.mmap2;
-       attr->comm  = track;
+       attr->task     = track;
+       attr->mmap     = track;
+       attr->mmap2    = track && !perf_missing_features.mmap2;
+       attr->comm     = track;
+       attr->build_id = track && opts->build_id;
+
        /*
         * ksymbol is tracked separately with text poke because it needs to be
         * system wide and enabled immediately.
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 3840d02f0f7b..829af17a0867 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -98,6 +98,11 @@ static void perf_probe_text_poke(struct evsel *evsel)
        evsel->core.attr.text_poke = 1;
 }
 
+static void perf_probe_build_id(struct evsel *evsel)
+{
+       evsel->core.attr.build_id = 1;
+}
+
 bool perf_can_sample_identifier(void)
 {
        return perf_probe_api(perf_probe_sample_identifier);
@@ -172,3 +177,8 @@ bool perf_can_aux_sample(void)
 
        return true;
 }
+
+bool perf_can_record_build_id(void)
+{
+       return perf_probe_api(perf_probe_build_id);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index d5506a983a94..f12ca55f509a 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -11,5 +11,6 @@ bool perf_can_record_cpu_wide(void);
 bool perf_can_record_switch_events(void);
 bool perf_can_record_text_poke_events(void);
 bool perf_can_sample_identifier(void);
+bool perf_can_record_build_id(void);
 
 #endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c 
b/tools/perf/util/perf_event_attr_fprintf.c
index e67a227c0ce7..0f1c62d40a89 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -134,6 +134,7 @@ int perf_event_attr__fprintf(FILE *fp, struct 
perf_event_attr *attr,
        PRINT_ATTRf(bpf_event, p_unsigned);
        PRINT_ATTRf(aux_output, p_unsigned);
        PRINT_ATTRf(cgroup, p_unsigned);
+       PRINT_ATTRf(build_id, p_unsigned);
 
        PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, 
p_unsigned);
        PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 266760ac9143..609e706f4282 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -49,6 +49,7 @@ struct record_opts {
        bool          no_bpf_event;
        bool          kcore;
        bool          text_poke;
+       bool          build_id;
        unsigned int  freq;
        unsigned int  mmap_pages;
        unsigned int  auxtrace_mmap_pages;

Reply via email to