Em Wed, Jan 18, 2017 at 05:41:49PM -0800, Andi Kleen escreveu:
> From: Andi Kleen <a...@linux.intel.com>
> 
> When dumping PT traces with perf script it is very useful to see the
> assembler for each sample, so that it is easily possible to follow
> the control flow.
> 
> As using objdump is difficult and inefficient from perf script this
> patch uses the Intel xed library to implement assembler output.
> The library can be downloaded from http://github.com/intelxed/xed

What I have, and there are multiple changes to make this build, is
available at the tmp.perf/xed branch of my tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git

https://git.kernel.org/cgit/linux/kernel/git/acme/linux.git/log/?h=tmp.perf/xed

So, while testing this I noticed some differences from what objdump -d produces
to what is produced with this patch, see below, where it is different I added
the bytes from the objdump output, have to lookup the tables manually to check
each case, and some seem harmless.

Do you know if there is any tool comparing the output of objdump -d to what is
produced by a similar xed based tool?

Disassembly of perf_evsel__enable() bits referenced in the samples
collected for this test:

  'perf script' with this patch:                     DIFF   objdump -d

  4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je   
  4b84d0 <perf_evsel__enable+0x70>
  4b84d0  add $0x1, %r14                                                   add  
  $0x1,%r14
  4b84d4  cmp %r14d, %ebx                                                  cmp  
  %r14d,%ebx
  4b84d7  jle 0x4b8530 <perf_evsel__enable+0xd0>                           jle  
  4b8530 <perf_evsel__enable+0xd0>
  4b8530  add $0x1, %r12                                                   add  
  $0x1,%r12
  4b8534  cmp %r12d, %r13d                                                 cmp  
  %r12d,%r13d
  4b8537  jnle 0x4b84c2 <perf_evsel__enable+0x62>    7f 89                 jg   
  4b84c2 <perf_evsel__enable+0x62>
  4b84c2  xor %r14d, %r14d                                                 xor  
  %r14d,%r14d
  4b84c5  test %ebx, %ebx                                                  test 
  %ebx,%ebx
  4b84c7  jnle 0x4b84d9 <perf_evsel__enable+0x79>    7f 10                 jg   
  4b84d9 <perf_evsel__enable+0x79>
  4b84d9  movq  0x90(%r15), %rax                     49 8b 87 90 00 00 00  mov  
  0x90(%r15),%rax
  4b84e0  mov %r12, %rdx                                                   mov  
  %r12,%rdx
  4b84e3  mov %r14, %rcx                                                   mov  
  %r14,%rcx
  4b84e6  mov $0x2400, %esi                                                mov  
  $0x2400,%esi
  4b84eb  imulq  (%rax), %rdx                        48 0f af 10           imul 
  (%rax),%rdx
  4b84ef  imulq  0x8(%rax), %rcx                     48 0f af 48 08        imul 
  0x8(%rax),%rcx
  4b84f4  add %rdx, %rax                                                   add  
  %rdx,%rax
  4b84f7  xor %edx, %edx                                                   xor  
  %edx,%edx
  4b84f9  movl  0x18(%rcx,%rax,1), %edi              8b 7c 01 18           mov  
  0x18(%rcx,%rax,1),%edi
  4b84fd  xor %eax, %eax                                                   xor  
  %eax,%eax
  4b84ff  callq  0x42d990 <ioctl@plt>                                      
callq  42d990 <ioctl@plt>
  4b8504  test %eax, %eax                                                  test 
  %eax,%eax
  4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je   
  4b84d0 <perf_evsel__enable+0x70>

Built today using xed from:

https://github.com/intelxed/xed

[acme@jouet xed]$ git log --oneline -5
4507b57ba629 rebase tests to account for new operand sorting
c7c1777216f5 generator: xed operand ordering sort was wrong
eb45a282de28 convert LOOPNE/E comments to XED COMMENT field
22427e1a4027 generator.py: duplicate iform check now checks isa-set conflicts
4a0a09a1542e Disambiguate iforms for VFPCLASS{PD,PS} mem forms, suffix with VL
[acme@jouet xed]$
 
> The previous version of this patch used udis86, but was
> rejected because udis86 was unmaintained and a runtime dependency.
> Using the recently released xed avoids both of these problems:
> - XED is well maintained and used by many Intel tools
> - XED is linked statically so there is no runtime dependency.
> 
> The library is probed as an external dependency in the usual way. Then perf
> script calls into it when needed, and handles callbacks to resolve
> symbols.
> 
> % perf record -e intel_pt//u true
> % perf script -F sym,symoff,ip,asm --itrace=i0ns | head
>      7fc7188b4190 _start+0x0  mov %rsp, %rdi
>      7fc7188b4193 _start+0x3  call _dl_start
>      7fc7188b7710 _dl_start+0x0       push %rbp
>      7fc7188b7711 _dl_start+0x1       mov %rsp, %rbp
>      7fc7188b7714 _dl_start+0x4       push %r15
>      7fc7188b7716 _dl_start+0x6       push %r14
>      7fc7188b7718 _dl_start+0x8       push %r13
>      7fc7188b771a _dl_start+0xa       push %r12
>      7fc7188b771c _dl_start+0xc       mov %rdi, %r12
>      7fc7188b771f _dl_start+0xf       push %rbx
> 
> v2:
> Converted to use XED instead of udis86.
> Separate disassembler interface into separate arch specific file.
> Lots of cleanups and improvements.
> 
> Cc: adrian.hun...@intel.com
> Signed-off-by: Andi Kleen <a...@linux.intel.com>
> ---
>  tools/perf/Documentation/perf-script.txt |  4 +-
>  tools/perf/builtin-script.c              | 72 
> +++++++++++++++++++++++++++-----
>  2 files changed, 64 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-script.txt 
> b/tools/perf/Documentation/perf-script.txt
> index 4ed5f239ba7d..497989ea9768 100644
> --- a/tools/perf/Documentation/perf-script.txt
> +++ b/tools/perf/Documentation/perf-script.txt
> @@ -116,7 +116,7 @@ OPTIONS
>  --fields::
>          Comma separated list of fields to print. Options are:
>          comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
> -        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
> +        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, asm.
>          callindent, insn, insnlen. Field list can be prepended with the 
> type, trace, sw or hw,
>          to indicate to which event type the field list applies.
>          e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
> @@ -198,6 +198,8 @@ OPTIONS
>  
>       The brstacksym is identical to brstack, except that the FROM and TO 
> addresses are printed in a symbolic form if possible.
>  
> +     When asm is specified the assembler instruction of each sample is 
> printed in disassembled form.
> +
>  -k::
>  --vmlinux=<file>::
>          vmlinux pathname
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index c0783b4f7b6c..7a09c4f7df3f 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -28,6 +28,7 @@
>  #include <linux/time64.h>
>  #include "asm/bug.h"
>  #include "util/mem-events.h"
> +#include "util/dis.h"
>  
>  static char const            *script_name;
>  static char const            *generate_script_lang;
> @@ -69,6 +70,7 @@ enum perf_output_field {
>       PERF_OUTPUT_CALLINDENT      = 1U << 20,
>       PERF_OUTPUT_INSN            = 1U << 21,
>       PERF_OUTPUT_INSNLEN         = 1U << 22,
> +     PERF_OUTPUT_ASM             = 1U << 23,
>  };
>  
>  struct output_option {
> @@ -98,6 +100,7 @@ struct output_option {
>       {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
>       {.str = "insn", .field = PERF_OUTPUT_INSN},
>       {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
> +     {.str = "asm", .field = PERF_OUTPUT_ASM},
>  };
>  
>  /* default set to maintain compatibility with current format */
> @@ -292,7 +295,11 @@ static int perf_evsel__check_attr(struct perf_evsel 
> *evsel,
>                      "selected. Hence, no address to lookup the source line 
> number.\n");
>               return -EINVAL;
>       }
> -
> +     if (PRINT_FIELD(ASM) && !PRINT_FIELD(IP)) {
> +             pr_err("Display of assembler requested but sample IP is not\n"
> +                    "selected.\n");
> +             return -EINVAL;
> +     }
>       if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
>               perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
>                                       PERF_OUTPUT_TID|PERF_OUTPUT_PID))
> @@ -436,6 +443,39 @@ static void print_sample_iregs(struct perf_sample 
> *sample,
>       }
>  }
>  
> +static void print_sample_asm(union perf_event *event,
> +                          struct perf_sample *sample,
> +                          struct thread *thread,
> +                          struct addr_location *al,
> +                          struct machine *machine)
> +{
> +     struct perf_dis x;
> +     u8 buffer[32];
> +     int len;
> +     u64 offset;
> +
> +     x.thread = thread;
> +     x.cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
> +     x.cpu = sample->cpu;
> +
> +     if (!al->map || !al->map->dso)
> +             return;
> +     if (al->map->dso->data.status == DSO_DATA_STATUS_ERROR)
> +             return;
> +
> +     /* Load maps to ensure dso->is_64_bit has been updated */
> +     map__load(al->map);
> +     x.is64bit = al->map->dso->is_64_bit;
> +
> +     offset = al->map->map_ip(al->map, sample->ip);
> +     len = dso__data_read_offset(al->map->dso, machine,
> +                                 offset, buffer, MAXINSN);
> +     if (len <= 0)
> +             return;
> +
> +     printf("\t%s", disas_inst(&x, sample->ip, buffer, len, NULL));
> +}
> +
>  static void print_sample_start(struct perf_sample *sample,
>                              struct thread *thread,
>                              struct perf_evsel *evsel)
> @@ -631,8 +671,12 @@ static void print_sample_callindent(struct perf_sample 
> *sample,
>               printf("%*s", spacing - len, "");
>  }
>  
> -static void print_insn(struct perf_sample *sample,
> -                    struct perf_event_attr *attr)
> +static void print_insn(union perf_event *event,
> +                    struct perf_sample *sample,
> +                    struct perf_event_attr *attr,
> +                    struct thread *thread,
> +                    struct addr_location *al,
> +                    struct machine *machine)
>  {
>       if (PRINT_FIELD(INSNLEN))
>               printf(" ilen: %d", sample->insn_len);
> @@ -643,12 +687,16 @@ static void print_insn(struct perf_sample *sample,
>               for (i = 0; i < sample->insn_len; i++)
>                       printf(" %02x", (unsigned char)sample->insn[i]);
>       }
> +     if (PRINT_FIELD(ASM))
> +             print_sample_asm(event, sample, thread, al, machine);
>  }
>  
> -static void print_sample_bts(struct perf_sample *sample,
> +static void print_sample_bts(union perf_event *event,
> +                          struct perf_sample *sample,
>                            struct perf_evsel *evsel,
>                            struct thread *thread,
> -                          struct addr_location *al)
> +                          struct addr_location *al,
> +                          struct machine *machine)
>  {
>       struct perf_event_attr *attr = &evsel->attr;
>       bool print_srcline_last = false;
> @@ -689,7 +737,7 @@ static void print_sample_bts(struct perf_sample *sample,
>       if (print_srcline_last)
>               map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
>  
> -     print_insn(sample, attr);
> +     print_insn(event, sample, attr, thread, al, machine);
>  
>       printf("\n");
>  }
> @@ -871,7 +919,9 @@ static size_t data_src__printf(u64 data_src)
>  
>  static void process_event(struct perf_script *script,
>                         struct perf_sample *sample, struct perf_evsel *evsel,
> -                       struct addr_location *al)
> +                       struct addr_location *al,
> +                       struct machine *machine,
> +                       union perf_event *event)
>  {
>       struct thread *thread = al->thread;
>       struct perf_event_attr *attr = &evsel->attr;
> @@ -898,7 +948,7 @@ static void process_event(struct perf_script *script,
>               print_sample_flags(sample->flags);
>  
>       if (is_bts_event(attr)) {
> -             print_sample_bts(sample, evsel, thread, al);
> +             print_sample_bts(event, sample, evsel, thread, al, machine);
>               return;
>       }
>  
> @@ -936,7 +986,7 @@ static void process_event(struct perf_script *script,
>  
>       if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
>               print_sample_bpf_output(sample);
> -     print_insn(sample, attr);
> +     print_insn(event, sample, attr, thread, al, machine);
>       printf("\n");
>  }
>  
> @@ -1046,7 +1096,7 @@ static int process_sample_event(struct perf_tool *tool,
>       if (scripting_ops)
>               scripting_ops->process_event(event, sample, evsel, &al);
>       else
> -             process_event(scr, sample, evsel, &al);
> +             process_event(scr, sample, evsel, &al, machine, event);
>  
>  out_put:
>       addr_location__put(&al);
> @@ -2152,7 +2202,7 @@ int cmd_script(int argc, const char **argv, const char 
> *prefix __maybe_unused)
>                    "Valid types: hw,sw,trace,raw. "
>                    "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
>                    "addr,symoff,period,iregs,brstack,brstacksym,flags,"
> -                  "bpf-output,callindent,insn,insnlen", parse_output_fields),
> +                  "bpf-output,callindent,insn,insnlen,asm", 
> parse_output_fields),
>       OPT_BOOLEAN('a', "all-cpus", &system_wide,
>                   "system-wide collection from all CPUs"),
>       OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, 
> "symbol[,symbol...]",
> -- 
> 2.9.3

Reply via email to