On Wed, Feb 18, 2015 at 10:40:26PM +0100, Stephane Eranian wrote:
> This patch adds a -j jitdump option to perf inject.
> 
> This options injects MMAP records into the perf.data
> file to cover the jitted code mmaps. It also emits
> ELF images for each function in the jidump file.
> Those images are created where the jitdump file is.
> The MMAP records point to that location as well.
> 
> Typical flow:
> $ java -agentpath:libpjvmti.so java_class
> $ perf inject -j ~/.debug/jit/java-jit-20140514.XXAb0e5C/jit-7640.dump \
>               -i perf.data \
>               -o perf.data.jitted
> 
> $ perf report -i perf.data.jitted
> 
> Note that jitdump.h support is not limited to Java, it works with
> any jitted environment modified to emit the jitdump file format,
> include those where code can be jitted multiple times and moved
> around.
> 
> The jitdump.h format is adapted from the Oprofile project.
> 
> Signed-off-by: Stephane Eranian <eran...@google.com>
> ---
>  tools/perf/Documentation/perf-inject.txt |  11 +
>  tools/perf/Makefile.perf                 |   6 +-
>  tools/perf/builtin-inject.c              | 205 ++++++++++++++
>  tools/perf/util/genelf.c                 | 463 
> +++++++++++++++++++++++++++++++
>  tools/perf/util/genelf.h                 |   6 +
>  tools/perf/util/jit.h                    |  27 ++
>  tools/perf/util/jitdump.c                | 233 ++++++++++++++++
>  tools/perf/util/jitdump.h                |  92 ++++++
>  8 files changed, 1042 insertions(+), 1 deletion(-)
>  create mode 100644 tools/perf/util/genelf.c
>  create mode 100644 tools/perf/util/genelf.h
>  create mode 100644 tools/perf/util/jit.h
>  create mode 100644 tools/perf/util/jitdump.c
>  create mode 100644 tools/perf/util/jitdump.h
> 
> diff --git a/tools/perf/Documentation/perf-inject.txt 
> b/tools/perf/Documentation/perf-inject.txt
> index dc7442c..237f195 100644
> --- a/tools/perf/Documentation/perf-inject.txt
> +++ b/tools/perf/Documentation/perf-inject.txt
> @@ -40,6 +40,17 @@ OPTIONS
>       Merge sched_stat and sched_switch for getting events where and how long
>       tasks slept. sched_switch contains a callchain where a task slept and
>       sched_stat contains a timeslice how long a task slept.
> +-j::
> +--jit::
> +     Merge a jitdump file into the perf.data file by adding mmap records to
> +     cover jitted code and emit ELF images for each jitted function. The ELF
> +     images are saved in the same directory as the jidump. Use -E to suppress
> +     ELF images generation.
> +-E::
> +--jit-disable-elf::
> +     When used with -, it prevents creating the ELF images for each jitted
> +     function. Only the jitted code mmap records are injected into the 
> perf.data
> +     file. Option as no effect when -j is not used.
>  
>  --kallsyms=<file>::
>       kallsyms pathname
> diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
> index 53a44be..d405632 100644
> --- a/tools/perf/Makefile.perf
> +++ b/tools/perf/Makefile.perf
> @@ -324,6 +324,7 @@ LIB_H += util/perf_regs.h
>  LIB_H += util/unwind.h
>  LIB_H += util/vdso.h
>  LIB_H += util/tsc.h
> +LIB_H += util/jitdump.h
>  LIB_H += ui/helpline.h
>  LIB_H += ui/progress.h
>  LIB_H += ui/util.h
> @@ -412,6 +413,8 @@ LIB_OBJS += $(OUTPUT)util/tsc.o
>  LIB_OBJS += $(OUTPUT)util/cloexec.o
>  LIB_OBJS += $(OUTPUT)util/thread-stack.o
>  LIB_OBJS += $(OUTPUT)util/demangle-java.o
> +LIB_OBJS += $(OUTPUT)util/jitdump.o
> +LIB_OBJS += $(OUTPUT)util/genelf.o
>  
>  LIB_OBJS += $(OUTPUT)ui/setup.o
>  LIB_OBJS += $(OUTPUT)ui/helpline.o
> @@ -498,7 +501,8 @@ BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
>  BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
>  BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
>  
> -PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT)
> +PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) -lcrypto
> +

This should be guarded by a feature test.


>  
>  # We choose to avoid "if .. else if .. else .. endif endif"
>  # because maintaining the nesting to match is a pain.  If

[SNIP]
> +static int jit_repipe_code_load(struct perf_inject *inject, union jr_entry 
> *jr)
> +{
> +     struct perf_sample sample;
> +     union perf_event *event;
> +     unsigned long code, addr;
> +     size_t size;
> +     const char *sym;
> +     uint32_t count;
> +     int ret, csize;
> +     pid_t pid;
> +     struct {
> +             u32 pid, tid;
> +             u64 time;
> +     } *id;
> +
> +     pid   = jr->load.pid;
> +     csize = jr->load.code_size;
> +     addr  = jr->load.code_addr;
> +     sym   = (void *)((unsigned long)jr + sizeof(jr->load));
> +     code  = (unsigned long)jr + jr->load.p.total_size - csize;
> +     count = jr->load.code_index;
> +
> +     /*
> +      * +16 to account for sample_id_all (hack)
> +      */

Why not use perf_evlist__id_hdr_size() ?


> +     event = malloc(sizeof(*event) + 16);
> +     if (!event)
> +             return -1;
> +
> +     memset(event, 0, sizeof(*event));
> +
> +     size = snprintf(event->mmap.filename, PATH_MAX, "%s/jitted-%d-%u",
> +              inject->jit_dir,
> +              pid,
> +              count) + 1;
> +     size = PERF_ALIGN(size, sizeof(u64));
> +     if (!inject->jit_disable_elf) {
> +             ret = jit_emit_elf(event->mmap.filename, sym, code, csize);
> +             if (ret) {
> +                     free(event);
> +                     return -1;
> +             }
> +     }
> +
> +     event->mmap.header.type = PERF_RECORD_MMAP;
> +     event->mmap.header.misc = PERF_RECORD_MISC_USER;
> +     //event->mmap.header.size = sizeof(event->mmap) + 16;
> +     event->mmap.header.size = (sizeof(event->mmap) -
> +                     (sizeof(event->mmap.filename) - size) + 16); 
> //machine->id_hdr_size);
> +     event->mmap.pgoff = 0;
> +     event->mmap.start = addr;
> +     event->mmap.len   = csize;
> +     event->mmap.pid   = pid;
> +     event->mmap.tid   = jr->load.tid;
> +
> +     id = (void *)((unsigned long)event + event->mmap.header.size - 16);
> +     id->pid  = pid;
> +     id->tid  = jr->load.tid;
> +     id->time = jr->load.p.timestamp;
> +
> +     memset(&sample, 0, sizeof(sample));
> +     sample.time = id->time;
> +
> +     return perf_event__repipe_synth(&inject->tool, event);
> +}

[SNIP]
> diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
> new file mode 100644
> index 0000000..a6f9e43
> --- /dev/null
> +++ b/tools/perf/util/genelf.c
> @@ -0,0 +1,463 @@
> +/*
> + * genelf.c
> + * Copyright (C) 2014, Google, Inc
> + *
> + * Contributed by:
> + *   Stephane Eranian <eran...@gmail.com>
> + *
> + * Released under the GPL v2. (and only v2, not any later version)
> + */
> +
> +#include <sys/types.h>
> +#include <stdio.h>
> +#include <getopt.h>
> +#include <stddef.h>
> +#include <libelf.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <err.h>
> +
> +#include "perf.h"
> +#include "genelf.h"
> +
> +#define JVMTI
> +#define BUILD_ID_MD5
> +#undef BUILD_ID_SHA  /* does not seem to work well when linked with Java */
> +#undef  BUILD_ID_URANDOM     /* different uuid for each run */
> +
> +#ifdef BUILD_ID_SHA
> +#include <openssl/sha.h>
> +#endif
> +
> +#ifdef BUILD_ID_MD5
> +#include <openssl/md5.h>
> +#endif
> +
> +#if   defined(__arm__)
> +#define GEN_ELF_ARCH EM_ARM
> +#define GEN_ELF_ENDIAN       ELFDATA2LSB
> +#define GEN_ELF_CLASS        ELFCLASS32
> +#elif defined(__x86_64__)
> +#define GEN_ELF_ARCH EM_X86_64
> +#define GEN_ELF_ENDIAN       ELFDATA2LSB
> +#define GEN_ELF_CLASS        ELFCLASS64
> +#elif defined(__i386__)
> +#define GEN_ELF_ARCH EM_386
> +#define GEN_ELF_ENDIAN       ELFDATA2LSB
> +#define GEN_ELF_CLASS        ELFCLASS32
> +#elif defined(__ppcle__)
> +#define GEN_ELF_ARCH EM_PPC
> +#define GEN_ELF_ENDIAN       ELFDATA2LSB
> +#define GEN_ELF_CLASS        ELFCLASS64
> +#elif defined(__powerpc__)
> +#define GEN_ELF_ARCH EM_PPC64
> +#define GEN_ELF_ENDIAN       ELFDATA2MSB
> +#define GEN_ELF_CLASS        ELFCLASS64
> +#elif defined(__powerpcle__)
> +#define GEN_ELF_ARCH EM_PPC64
> +#define GEN_ELF_ENDIAN       ELFDATA2LSB
> +#define GEN_ELF_CLASS        ELFCLASS64
> +#else
> +#error "unsupported architecture"
> +#endif

This makes other arch like s390 or sparc cannot build perf, right?

Thanks,
Namhyung


> +
> +#if GEN_ELF_CLASS == ELFCLASS64
> +#define elf_newehdr  elf64_newehdr
> +#define elf_getshdr  elf64_getshdr
> +#define Elf_Ehdr     Elf64_Ehdr
> +#define Elf_Shdr     Elf64_Shdr
> +#define Elf_Sym              Elf64_Sym
> +#define ELF_ST_TYPE(a)       ELF64_ST_TYPE(a)
> +#define ELF_ST_BIND(a)       ELF64_ST_BIND(a)
> +#define ELF_ST_VIS(a)        ELF64_ST_VISIBILITY(a)
> +#else
> +#define elf_newehdr  elf32_newehdr
> +#define elf_getshdr  elf32_getshdr
> +#define Elf_Ehdr     Elf32_Ehdr
> +#define Elf_Shdr     Elf32_Shdr
> +#define Elf_Sym              Elf32_Sym
> +#define ELF_ST_TYPE(a)       ELF32_ST_TYPE(a)
> +#define ELF_ST_BIND(a)       ELF32_ST_BIND(a)
> +#define ELF_ST_VIS(a)        ELF32_ST_VISIBILITY(a)
> +#endif
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to