Re: [Patch v1 10/10] perf tools: add new mem command for memory access profiling
On Wed, Oct 31, 2012 at 7:57 AM, Namhyung Kim wrote: > On Mon, 29 Oct 2012 16:15:52 +0100, Stephane Eranian wrote: >> This new command is a wrapper on top of perf record and >> perf report to make it easier to configure for memory >> access profiling. > > So this new command will be run only on speicific (PEBS > 2?) Intel > machines, right? Is there anything we can do for others? Or at least > it might emit a warning message.. > >> >> To record loads: >> $ perf mem -t load rec . >> >> To record stores: >> $ perf mem -t store rec . >> >> To get the report: >> $ perf mem -t load rep >> >> Signed-off-by: Stephane Eranian >> --- > [snip] >> +perf-mem(1) >> +=== >> + >> +NAME >> + >> +perf-mem - Profile memory accesses >> + >> +SYNOPSIS >> + >> +[verse] >> +'perf mem' -t load record >> +'perf mem' -t store record >> +'perf mem' -t load report >> +'perf mem' -t store report > > Is '-t' option mandatory? AFAISC it seems optional and defaults to load. > Yes, defaults to load. Fixed. > And is for record also mandatory? Doesn't 'perf record' make > it optional? > > If so, the above can be written like you did in 'mem_usage': > > 'perf mem' [] (record [] | report) > Fixed. > >> + >> +DESCRIPTION >> +--- >> +"perf mem -t record" runs a command and gathers memory operation data >> +from it, into perf.data. Perf record options are accepted and are passed >> through. >> + >> +"perf mem -t report" displays the result. It invokes perf report >> with the >> +right set of options to display a memory access profile. >> + >> +OPTIONS >> +--- >> +...:: >> + Any command you can specify in a shell. >> + >> +-t:: >> +--type=:: >> + Select the memory operation type: load or store > > It'd better saying it defaults to load. > Done. >> + >> +-R:: >> +--dump-raw-samples=:: >> + Dump the raw decoded samples on the screen in a format that is easy to >> parse with >> + one sample per line. > > Didn't we usually use -D switch for this? > Using -D to be consistent with report. >> + >> +-x:: >> +--field-separator:: >> + Specify the field separator used when dump raw samples (-R option). By >> default, >> + The separator is the space character. > > And using -t for this will make it consistent with perf report IMHO. > >> + No it's -x there too. >> +-C:: >> +--cpu-list:: >> + Restrict dump of raw samples to those provided via this option. Note >> that the same >> + option can be passed in record mode. It will be interpreted the same >> way as perf >> + record. >> + >> +SEE ALSO >> + >> +linkperf:perf-record[1], linkperf:perf-report[1] > [snip] >> +#define MEM_OPERATION_LOAD "load" >> +#define MEM_OPERATION_STORE "store" >> + >> +static char const*input_name = "perf.data"; > > We have a global 'input_name' as of commit 70cb4e963f77 ("perf tools: > Add a global variable 'const char *input_name'"). > > Yes. Fixed. >> +static const char*mem_operation = MEM_OPERATION_LOAD; >> +static const char*csv_sep= NULL; > > Why not use symbol_conf.field_sep? > Done. >> + >> +struct perf_mem { >> + struct perf_tooltool; >> + char const *input_name; >> + symbol_filter_t annotate_init; >> + boolhide_unresolved; >> + booldump_raw; >> + const char *cpu_list; >> + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); >> +}; >> + >> +static const char * const mem_usage[] = { >> + "perf mem [] {record |report}", >> + NULL >> +}; > [snip] >> +static int report_raw_events(struct perf_mem *mem) >> +{ >> + int err = -EINVAL; >> + int ret; >> + struct perf_session *session = perf_session__new(input_name, O_RDONLY, >> + 0, false, &mem->tool); >> + >> + if (mem->cpu_list) { >> + ret = perf_session__cpu_bitmap(session, mem->cpu_list, >> +mem->cpu_bitmap); >> + if (ret) >> + goto out_delete; >> + } >> + >> + if (symbol__init() < 0) >> + return -1; >> + >> + if (session == NULL) >> + return -ENOMEM; > > This check should be moved before perf_session__cpu_bitmap() calls. > Yes. Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Patch v1 10/10] perf tools: add new mem command for memory access profiling
On Mon, 29 Oct 2012 16:15:52 +0100, Stephane Eranian wrote: > This new command is a wrapper on top of perf record and > perf report to make it easier to configure for memory > access profiling. So this new command will be run only on speicific (PEBS > 2?) Intel machines, right? Is there anything we can do for others? Or at least it might emit a warning message.. > > To record loads: > $ perf mem -t load rec . > > To record stores: > $ perf mem -t store rec . > > To get the report: > $ perf mem -t load rep > > Signed-off-by: Stephane Eranian > --- [snip] > +perf-mem(1) > +=== > + > +NAME > + > +perf-mem - Profile memory accesses > + > +SYNOPSIS > + > +[verse] > +'perf mem' -t load record > +'perf mem' -t store record > +'perf mem' -t load report > +'perf mem' -t store report Is '-t' option mandatory? AFAISC it seems optional and defaults to load. And is for record also mandatory? Doesn't 'perf record' make it optional? If so, the above can be written like you did in 'mem_usage': 'perf mem' [] (record [] | report) > + > +DESCRIPTION > +--- > +"perf mem -t record" runs a command and gathers memory operation data > +from it, into perf.data. Perf record options are accepted and are passed > through. > + > +"perf mem -t report" displays the result. It invokes perf report with > the > +right set of options to display a memory access profile. > + > +OPTIONS > +--- > +...:: > + Any command you can specify in a shell. > + > +-t:: > +--type=:: > + Select the memory operation type: load or store It'd better saying it defaults to load. > + > +-R:: > +--dump-raw-samples=:: > + Dump the raw decoded samples on the screen in a format that is easy to > parse with > + one sample per line. Didn't we usually use -D switch for this? > + > +-x:: > +--field-separator:: > + Specify the field separator used when dump raw samples (-R option). By > default, > + The separator is the space character. And using -t for this will make it consistent with perf report IMHO. > + > +-C:: > +--cpu-list:: > + Restrict dump of raw samples to those provided via this option. Note > that the same > + option can be passed in record mode. It will be interpreted the same > way as perf > + record. > + > +SEE ALSO > + > +linkperf:perf-record[1], linkperf:perf-report[1] [snip] > +#define MEM_OPERATION_LOAD "load" > +#define MEM_OPERATION_STORE "store" > + > +static char const*input_name = "perf.data"; We have a global 'input_name' as of commit 70cb4e963f77 ("perf tools: Add a global variable 'const char *input_name'"). > +static const char*mem_operation = MEM_OPERATION_LOAD; > +static const char*csv_sep= NULL; Why not use symbol_conf.field_sep? > + > +struct perf_mem { > + struct perf_tooltool; > + char const *input_name; > + symbol_filter_t annotate_init; > + boolhide_unresolved; > + booldump_raw; > + const char *cpu_list; > + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); > +}; > + > +static const char * const mem_usage[] = { > + "perf mem [] {record |report}", > + NULL > +}; [snip] > +static int report_raw_events(struct perf_mem *mem) > +{ > + int err = -EINVAL; > + int ret; > + struct perf_session *session = perf_session__new(input_name, O_RDONLY, > + 0, false, &mem->tool); > + > + if (mem->cpu_list) { > + ret = perf_session__cpu_bitmap(session, mem->cpu_list, > +mem->cpu_bitmap); > + if (ret) > + goto out_delete; > + } > + > + if (symbol__init() < 0) > + return -1; > + > + if (session == NULL) > + return -ENOMEM; This check should be moved before perf_session__cpu_bitmap() calls. Thanks, Namhyung > + > + printf("# PID, TID, IP, ADDR, COST, DSRC, SYMBOL\n"); > + > + err = perf_session__process_events(session, &mem->tool); > + if (err) > + return err; > + > + return 0; > + > +out_delete: > + perf_session__delete(session); > + return err; > +} -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[Patch v1 10/10] perf tools: add new mem command for memory access profiling
This new command is a wrapper on top of perf record and perf report to make it easier to configure for memory access profiling. To record loads: $ perf mem -t load rec . To record stores: $ perf mem -t store rec . To get the report: $ perf mem -t load rep Signed-off-by: Stephane Eranian --- tools/perf/Documentation/perf-mem.txt | 51 +++ tools/perf/Makefile |1 + tools/perf/builtin-mem.c | 237 + tools/perf/builtin.h |1 + tools/perf/command-list.txt |1 + tools/perf/perf.c |1 + 6 files changed, 292 insertions(+) create mode 100644 tools/perf/Documentation/perf-mem.txt create mode 100644 tools/perf/builtin-mem.c diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt new file mode 100644 index 000..57ead4f --- /dev/null +++ b/tools/perf/Documentation/perf-mem.txt @@ -0,0 +1,51 @@ +perf-mem(1) +=== + +NAME + +perf-mem - Profile memory accesses + +SYNOPSIS + +[verse] +'perf mem' -t load record +'perf mem' -t store record +'perf mem' -t load report +'perf mem' -t store report + +DESCRIPTION +--- +"perf mem -t record" runs a command and gathers memory operation data +from it, into perf.data. Perf record options are accepted and are passed through. + +"perf mem -t report" displays the result. It invokes perf report with the +right set of options to display a memory access profile. + +OPTIONS +--- +...:: + Any command you can specify in a shell. + +-t:: +--type=:: + Select the memory operation type: load or store + +-R:: +--dump-raw-samples=:: + Dump the raw decoded samples on the screen in a format that is easy to parse with + one sample per line. + +-x:: +--field-separator:: + Specify the field separator used when dump raw samples (-R option). By default, + The separator is the space character. + +-C:: +--cpu-list:: + Restrict dump of raw samples to those provided via this option. Note that the same + option can be passed in record mode. It will be interpreted the same way as perf + record. + +SEE ALSO + +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 629fc6a..643b316 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -459,6 +459,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o +BUILTIN_OBJS += $(OUTPUT)builtin-mem.o PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c new file mode 100644 index 000..5334f7d --- /dev/null +++ b/tools/perf/builtin-mem.c @@ -0,0 +1,237 @@ +#include "builtin.h" +#include "perf.h" + +#include "util/parse-options.h" +#include "util/trace-event.h" +#include "util/tool.h" +#include "util/session.h" + +#define MEM_OPERATION_LOAD "load" +#define MEM_OPERATION_STORE"store" + +static char const *input_name = "perf.data"; +static const char *mem_operation = MEM_OPERATION_LOAD; +static const char *csv_sep= NULL; + +struct perf_mem { + struct perf_tooltool; + char const *input_name; + symbol_filter_t annotate_init; + boolhide_unresolved; + booldump_raw; + const char *cpu_list; + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +}; + +static const char * const mem_usage[] = { + "perf mem [] {record |report}", + NULL +}; + +static int __cmd_record(int argc, const char **argv) +{ + int rec_argc, i = 0, j; + const char **rec_argv; + char event[64]; + int ret; + + rec_argc = argc + 4; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (!rec_argv) + return -1; + + rec_argv[i++] = strdup("record"); + if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) + rec_argv[i++] = strdup("-l"); + rec_argv[i++] = strdup("-d"); + rec_argv[i++] = strdup("-e"); + + if (strcmp(mem_operation, MEM_OPERATION_LOAD)) + sprintf(event, "cpu/mem-stores/pp"); + else + sprintf(event, "cpu/mem-loads/pp"); + + rec_argv[i++] = strdup(event); + for (j = 1; j < argc; j++, i++) + rec_argv[i] = argv[j]; + + ret = cmd_record(i, rec_argv, NULL); + free(rec_argv); + return ret; +} + +static int +dump_raw_samples(struct perf_tool *tool, +union perf_event *event, +struct perf_sample *sample, +struct perf_evsel *evsel __maybe_unused, +struct machine *machine) +{ + struct perf_mem *mem = container_of(tool, struct perf_mem, tool); + struct addr_location al; +