[tip:perf/core] perf intel-pt: Add brief documentation for PEBS via Intel PT
Commit-ID: 243384dd25c8ea721c5c82a229eaf33cbd1bfd52 Gitweb: https://git.kernel.org/tip/243384dd25c8ea721c5c82a229eaf33cbd1bfd52 Author: Adrian Hunter AuthorDate: Tue, 6 Aug 2019 11:46:06 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Aug 2019 10:59:59 -0300 perf intel-pt: Add brief documentation for PEBS via Intel PT Document how to select PEBS via Intel PT and how to display synthesized PEBS samples. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190806084606.4021-8-alexander.shish...@linux.intel.com Signed-off-by: Alexander Shishkin [ Update the example to use a group with intel_pt// as the group leader, as per Alex comment ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 15 +++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 50c5b60101bd..e0d9e7dd4f17 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -919,3 +919,18 @@ amended to take the number of elements as a parameter. Note there is currently no advantage to using Intel PT instead of LBR, but that may change in the future if greater use is made of the data. + + +PEBS via Intel PT += + +Some hardware has the feature to redirect PEBS records to the Intel PT trace. +Recording is selected by using the aux-output config term e.g. + + perf record -c 1 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname + +Note that currently, software only supports redirecting at most one PEBS event. + +To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. + + perf script --itrace=oe
[tip:perf/core] perf tools: Add aux-output config term
Commit-ID: 1b9921546a9641aefc4a52c1c635b96b67142993 Gitweb: https://git.kernel.org/tip/1b9921546a9641aefc4a52c1c635b96b67142993 Author: Adrian Hunter AuthorDate: Tue, 6 Aug 2019 11:46:05 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Aug 2019 10:59:59 -0300 perf tools: Add aux-output config term Expose the aux_output attribute flag to the user to configure, by adding a config term 'aux-output'. For events that support it, selection of 'aux-output' causes the generation of AUX records instead of event records. This requires that an AUX area event is also provided. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190806084606.4021-7-alexander.shish...@linux.intel.com Signed-off-by: Alexander Shishkin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/util/evsel.c | 3 +++ tools/perf/util/evsel.h | 2 ++ tools/perf/util/parse-events.c | 8 tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.l | 1 + 6 files changed, 17 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index d5e58e0a2bca..c6f9f31b6039 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -60,6 +60,8 @@ OPTIONS - 'name' : User defined event name. Single quotes (') may be used to escape symbols in the name from parsing by shell and tool like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'. + - 'aux-output': Generate AUX records instead of events. This requires + that an AUX area event is also provided. See the linkperf:perf-list[1] man page for more parameters. diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 897a97af2d81..5da40511546b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -833,6 +833,9 @@ static void apply_config_terms(struct evsel *evsel, break; case PERF_EVSEL__CONFIG_TERM_PERCORE: break; + case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT: + attr->aux_output = term->val.aux_output ? 1 : 0; + break; default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 3cf35aa782b9..8a316dd54cd0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -52,6 +52,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, PERF_EVSEL__CONFIG_TERM_PERCORE, + PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, }; struct perf_evsel_config_term { @@ -70,6 +71,7 @@ struct perf_evsel_config_term { char*branch; unsigned long max_events; boolpercore; + boolaux_output; } val; bool weak; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2cfec3b7a982..9101568946d2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -963,6 +963,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", + [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT]= "aux-output", }; static bool config_term_shrinked; @@ -1083,6 +1084,9 @@ do { \ return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1133,6 +1137,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: return config_term_common(attr, term, err); default: if (err) { @@ -1225,6 +1230,9 @@ do { \ ADD_CONFIG_TERM(PERCORE, percore, term->val.num ? true : false); break; + case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0); + break; default:
[tip:perf/core] perf tools: Add itrace option 'o' to synthesize aux-output events
Commit-ID: 181ebb5e23a5e480f6d6aa2816a9c4aaa65afa59 Gitweb: https://git.kernel.org/tip/181ebb5e23a5e480f6d6aa2816a9c4aaa65afa59 Author: Adrian Hunter AuthorDate: Tue, 6 Aug 2019 11:46:03 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Aug 2019 10:59:59 -0300 perf tools: Add itrace option 'o' to synthesize aux-output events Add itrace option 'o' to synthesize events recorded in the AUX area due to the use of perf record's aux-output config term. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190806084606.4021-5-alexander.shish...@linux.intel.com Signed-off-by: Alexander Shishkin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 2 ++ tools/perf/util/auxtrace.c | 4 tools/perf/util/auxtrace.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index c2182cbabde3..82ff7dad40c2 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -5,6 +5,8 @@ x synthesize transactions events w synthesize ptwrite events p synthesize power events + o synthesize other events recorded due to the use + of aux-output (refer to perf record) e synthesize error events d create a debug log g synthesize a call chain (use with i or x) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 72ce4c5e7c78..60428576426e 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -974,6 +974,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->transactions = true; synth_opts->ptwrites = true; synth_opts->pwr_events = true; + synth_opts->other_events = true; synth_opts->errors = true; if (no_sample) { synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; @@ -1071,6 +1072,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'p': synth_opts->pwr_events = true; break; + case 'o': + synth_opts->other_events = true; + break; case 'e': synth_opts->errors = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 8ccabacd0b11..8e637ac3918e 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -60,6 +60,8 @@ enum itrace_period_type { * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events + * @other_events: whether to synthesize other events recorded due to the use of + *aux_output * @errors: whether to synthesize decoder error events * @dont_decode: whether to skip decoding entirely * @log: write a decoding log @@ -86,6 +88,7 @@ struct itrace_synth_opts { booltransactions; boolptwrites; boolpwr_events; + boolother_events; boolerrors; booldont_decode; boollog;
[tip:perf/core] perf intel-pt: Process options for PEBS event synthesis
Commit-ID: 9e64cefe4335b0f2799956d3f3cca8bb652d950f Gitweb: https://git.kernel.org/tip/9e64cefe4335b0f2799956d3f3cca8bb652d950f Author: Adrian Hunter AuthorDate: Tue, 6 Aug 2019 11:46:04 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Aug 2019 10:59:59 -0300 perf intel-pt: Process options for PEBS event synthesis Process synth_opts.other_events and attr.aux_output to set up for synthesizing PEBs via Intel PT events. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190806084606.4021-6-alexander.shish...@linux.intel.com Signed-off-by: Alexander Shishkin [ Fixed up libbperf clashes, i.e. some places using perf_evsel (now in libperf) need to use instead 'evsel' (a tools/perf only abstraction) ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 23 +++ tools/perf/util/intel-pt.c | 18 ++ 2 files changed, 41 insertions(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 218a4e694618..a8e633aa278a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -548,6 +548,26 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, evsel->core.attr.config); } +/* + * Currently, there is not enough information to disambiguate different PEBS + * events, so only allow one. + */ +static bool intel_pt_too_many_aux_output(struct evlist *evlist) +{ + struct evsel *evsel; + int aux_output_cnt = 0; + + evlist__for_each_entry(evlist, evsel) + aux_output_cnt += !!evsel->core.attr.aux_output; + + if (aux_output_cnt > 1) { + pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n"); + return true; + } + + return false; +} + static int intel_pt_recording_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) @@ -588,6 +608,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, return -EINVAL; } + if (intel_pt_too_many_aux_output(evlist)) + return -EINVAL; + if (!opts->full_auxtrace) return 0; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4c52204868d8..ea504fa9b623 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2894,6 +2894,22 @@ static int intel_pt_synth_events(struct intel_pt *pt, return 0; } +static void intel_pt_setup_pebs_events(struct intel_pt *pt) +{ + struct evsel *evsel; + + if (!pt->synth_opts.other_events) + return; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (evsel->core.attr.aux_output && evsel->id) { + pt->sample_pebs = true; + pt->pebs_evsel = evsel; + return; + } + } +} + static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) { struct evsel *evsel; @@ -3263,6 +3279,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; + intel_pt_setup_pebs_events(pt); + err = auxtrace_queues__process_index(&pt->queues, session); if (err) goto err_delete_thread;
[tip:perf/core] perf tools: Add aux_output attribute flag
Commit-ID: 5a4b58e5d64ac7ebca175ffd8d74ca1b5cb0a01f Gitweb: https://git.kernel.org/tip/5a4b58e5d64ac7ebca175ffd8d74ca1b5cb0a01f Author: Adrian Hunter AuthorDate: Tue, 6 Aug 2019 11:46:02 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Aug 2019 10:59:59 -0300 perf tools: Add aux_output attribute flag Add aux_output attribute flag to match the kernel's perf_event.h file. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190806084606.4021-4-alexander.shish...@linux.intel.com Signed-off-by: Alexander Shishkin Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 3 ++- tools/perf/util/evsel.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 7198ddd0c6b1..bb7b271397a6 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -374,7 +374,8 @@ struct perf_event_attr { namespaces : 1, /* include namespaces data */ ksymbol: 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ - __reserved_1 : 33; + aux_output : 1, /* generate AUX records instead of events */ + __reserved_1 : 32; union { __u32 wakeup_events;/* wakeup every n events */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 64bc32ed6dfa..897a97af2d81 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1587,6 +1587,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRf(ksymbol, p_unsigned); PRINT_ATTRf(bpf_event, p_unsigned); + PRINT_ATTRf(aux_output, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned);
[tip:perf/urgent] perf db-export: Fix thread__exec_comm()
Commit-ID: 3de7ae0b2a1d86dbb23d0cb135150534fdb2e836 Gitweb: https://git.kernel.org/tip/3de7ae0b2a1d86dbb23d0cb135150534fdb2e836 Author: Adrian Hunter AuthorDate: Thu, 8 Aug 2019 09:48:23 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Thu, 8 Aug 2019 15:41:10 -0300 perf db-export: Fix thread__exec_comm() Threads synthesized from /proc have comms with a start time of zero, and not marked as "exec". Currently, there can be 2 such comms. The first is created by processing a synthesized fork event and is set to the parent's comm string, and the second by processing a synthesized comm event set to the thread's current comm string. In the absence of an "exec" comm, thread__exec_comm() picks the last (oldest) comm, which, in the case above, is the parent's comm string. For a main thread, that is very probably wrong. Use the second-to-last in that case. This affects only db-export because it is the only user of thread__exec_comm(). Example: $ sudo perf record -a -o pt-a-sleep-1 -e intel_pt//u -- sleep 1 $ sudo chown ahunter pt-a-sleep-1 Before: $ perf script -i pt-a-sleep-1 --itrace=bep -s tools/perf/scripts/python/export-to-sqlite.py pt-a-sleep-1.db branches calls $ sqlite3 -header -column pt-a-sleep-1.db 'select * from comm_threads_view' comm_id command thread_id pid tid -- -- -- -- -- 1 swapper 1 0 0 2 rcu_sched 2 10 10 3 kthreadd3 78 78 5 sudo4 15180 15180 5 sudo5 15180 15182 7 kworker/4: 6 10335 10335 8 kthreadd7 55 55 10 systemd 8 865 865 10 systemd 9 865 875 13 perf10 15181 15181 15 sleep 10 15181 15181 16 kworker/3: 11 14179 14179 17 kthreadd12 29376 29376 19 systemd 13 746 746 21 systemd 14 401 401 23 systemd 15 879 879 23 systemd 16 879 945 25 kthreadd17 556 556 27 kworker/u1 18 14136 14136 28 kworker/u1 19 15021 15021 29 kthreadd20 509 509 31 systemd 21 836 836 31 systemd 22 836 967 33 systemd 23 11481148 33 systemd 24 11481163 35 kworker/2: 25 17988 17988 36 kworker/0: 26 13478 13478 After: $ perf script -i pt-a-sleep-1 --itrace=bep -s tools/perf/scripts/python/export-to-sqlite.py pt-a-sleep-1b.db branches calls $ sqlite3 -header -column pt-a-sleep-1b.db 'select * from comm_threads_view' comm_id command thread_id pid tid -- -- -- -- -- 1 swapper 1 0 0 2 rcu_sched 2 10 10 3 kswapd0 3 78 78 4 perf4 15180 15180 4 perf5 15180 15182 6 kworker/4: 6 10335 10335 7 kcompactd0 7 55 55 8 accounts-d 8 865 865 8 accounts-d 9 865 875 10 perf10 15181 15181 12 sleep 10 15181 15181 13 kworker/3: 11 14179 14179 14 kworker/1: 12 29376 29376 15 haveged 13 746 746 16 systemd-jo 14 401 401 17 NetworkMan 15 879 879 17 NetworkMan 16 879 945 19 irq/131-iw 17 556 556 20 kworker/u1 18 14136 14136 21 kworker/u1 19 15021 15021 22 kworker/u1 20 509 509 23 thermald21 836 836 23 thermald22 836 967 25 unity-sett 23 11481148 25 unity-sett 24 11481163 27 kworker/2: 25 17988 17988 28 kworker/0: 26 13478 13478 Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: sta...@vger.kernel.org Fixes: 65de51f93ebf ("perf tools: Identify which comms are from exec") Link: http://lkml.kernel.org/r/20190808064823.14846-1-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 12 +++- 1
[tip:perf/urgent] perf scripts python: export-to-postgresql.py: Export switch events
Commit-ID: 56789f3dc127d4f8c07ce2bb48629ba75e8ef16c Gitweb: https://git.kernel.org/tip/56789f3dc127d4f8c07ce2bb48629ba75e8ef16c Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:10 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 13:05:12 -0300 perf scripts python: export-to-postgresql.py: Export switch events Export switch events to a new table 'context_switches' and create a view 'context_switches_view'. The table and view will show automatically in the exported-sql-viewer.py script. If the table ends up empty, then it and the view are dropped. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-22-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 51 +++ 1 file changed, 51 insertions(+) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 13205e4e5b3b..7bd73a904b4e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -482,6 +482,17 @@ do_query(query, 'CREATE TABLE pwrx (' 'last_cstateinteger,' 'wake_reasoninteger)') +do_query(query, 'CREATE TABLE context_switches (' + 'id bigint NOT NULL,' + 'machine_id bigint,' + 'time bigint,' + 'cpuinteger,' + 'thread_out_id bigint,' + 'comm_out_idbigint,' + 'thread_in_id bigint,' + 'comm_in_id bigint,' + 'flags integer)') + do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' 'id,' @@ -695,6 +706,29 @@ do_query(query, 'CREATE VIEW power_events_view AS ' ' INNER JOIN selected_events ON selected_events.id = samples.evsel_id' ' ORDER BY samples.id') +do_query(query, 'CREATE VIEW context_switches_view AS ' + 'SELECT ' + 'context_switches.id,' + 'context_switches.machine_id,' + 'context_switches.time,' + 'context_switches.cpu,' + 'th_out.pid AS pid_out,' + 'th_out.tid AS tid_out,' + 'comm_out.comm AS comm_out,' + 'th_in.pid AS pid_in,' + 'th_in.tid AS tid_in,' + 'comm_in.comm AS comm_in,' + 'CASE WHEN context_switches.flags = 0 THEN \'in\'' + ' WHEN context_switches.flags = 1 THEN \'out\'' + ' WHEN context_switches.flags = 3 THEN \'out preempt\'' + ' ELSE CAST ( context_switches.flags AS VARCHAR(11) )' + 'END AS flags' + ' FROM context_switches' + ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id' + ' INNER JOIN threads AS th_in ON th_in.id= context_switches.thread_in_id' + ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id' + ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id') + file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) file_trailer = b"\377\377" @@ -759,6 +793,7 @@ mwait_file = open_output_file("mwait_table.bin") pwre_file = open_output_file("pwre_table.bin") exstop_file= open_output_file("exstop_table.bin") pwrx_file = open_output_file("pwrx_table.bin") +context_switches_file = open_output_file("context_switches_table.bin") def trace_begin(): printdate("Writing to intermediate files...") @@ -807,6 +842,7 @@ def trace_end(): copy_output_file(pwre_file, "pwre") copy_output_file(exstop_file, "exstop") copy_output_file(pwrx_file, "pwrx") + copy_output_file(context_switches_file, "context_switches") printdate("Removing intermediate files...") remove_output_file(evsel_file) @@ -828,6 +864,7 @@ def trace_end(): remove_output_file(pwre_file) remove_output_file(exstop_file) remove_output_file(pwrx_file) + remove_output_file(context_switches_file) os.rmdir(output_dir_name) printdate("Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') @@ -849,6 +886,7 @@ def trace_end(): do_query(query, 'ALTER TABLE pwreADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE pwrxADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE context_switches ADD PRIMARY KEY (id)') printdate("Adding foreign keys") do_query(query, 'ALTER TABLE threads ' @@ -900,6 +938,12 @@ def trace_end(): 'ADD CONSTRAINT idfkFOREIGN KEY (id)
[tip:perf/urgent] perf scripts python: export-to-sqlite.py: Export switch events
Commit-ID: 37c1f991b1bcdbe268b99b22e265738f4209f4f4 Gitweb: https://git.kernel.org/tip/37c1f991b1bcdbe268b99b22e265738f4209f4f4 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:09 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:37:35 -0300 perf scripts python: export-to-sqlite.py: Export switch events Export switch events to a new table 'context_switches' and create a view 'context_switches_view'. The table and view will show automatically in the exported-sql-viewer.py script. If the table ends up empty, then it and the view are dropped. Committer testing: Use the exported-sql-viewer.py and look at "Tables" -> "context_switches": id machine_id time cpu thread_out_id comm_out_id thread_in_id comm_in_id flags 1 1 187836111885918 71 12 2 3 2 1 187836111889369 71 12 2 0 3 1 187836112464618 72 31 1 1 4 1 187836112465511 72 31 1 0 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-21-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 41 +++ 1 file changed, 41 insertions(+) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 9156f6a1e5f0..8043a7272a56 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -306,6 +306,17 @@ do_query(query, 'CREATE TABLE pwrx (' 'last_cstateinteger,' 'wake_reasoninteger)') +do_query(query, 'CREATE TABLE context_switches (' + 'id integer NOT NULLPRIMARY KEY,' + 'machine_id bigint,' + 'time bigint,' + 'cpuinteger,' + 'thread_out_id bigint,' + 'comm_out_idbigint,' + 'thread_in_id bigint,' + 'comm_in_id bigint,' + 'flags integer)') + # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False try: @@ -530,6 +541,29 @@ do_query(query, 'CREATE VIEW power_events_view AS ' ' INNER JOIN selected_events ON selected_events.id = evsel_id' ' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')') +do_query(query, 'CREATE VIEW context_switches_view AS ' + 'SELECT ' + 'context_switches.id,' + 'context_switches.machine_id,' + 'context_switches.time,' + 'context_switches.cpu,' + 'th_out.pid AS pid_out,' + 'th_out.tid AS tid_out,' + 'comm_out.comm AS comm_out,' + 'th_in.pid AS pid_in,' + 'th_in.tid AS tid_in,' + 'comm_in.comm AS comm_in,' + 'CASE WHEN context_switches.flags = 0 THEN \'in\'' + ' WHEN context_switches.flags = 1 THEN \'out\'' + ' WHEN context_switches.flags = 3 THEN \'out preempt\'' + ' ELSE context_switches.flags ' + 'END AS flags' + ' FROM context_switches' + ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id' + ' INNER JOIN threads AS th_in ON th_in.id= context_switches.thread_in_id' + ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id' + ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id') + do_query(query, 'END TRANSACTION') evsel_query = QSqlQuery(db) @@ -571,6 +605,8 @@ exstop_query = QSqlQuery(db) exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)") pwrx_query = QSqlQuery(db) pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)") +context_switch_query = QSqlQuery(db) +context_switch_query.prepare("INSERT INTO context_switches VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): printdate("Writing records...") @@ -620,6 +656,8 @@ def trace_end(): drop("pwrx") if is_table_empty("cbr"): drop("cbr") + if is_table_empty("context_switches"): + drop("context_switches") if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") @@ -753,3 +791,6 @@ def synth_data(id, config, raw_buf, *x): pwrx(id, raw_buf) elif config == 5: cbr(id, raw_buf) + +def context_switch_table(*x): + bind_exec(context_switch_query, 9, x)
[tip:perf/urgent] perf db-export: Export switch events
Commit-ID: abde8722d9b0a317935506d9824e26f1aef6c24a Gitweb: https://git.kernel.org/tip/abde8722d9b0a317935506d9824e26f1aef6c24a Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:08 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:35:38 -0300 perf db-export: Export switch events Export details of switch events including the threads and their current comms. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-20-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c| 89 ++ tools/perf/util/db-export.h| 8 ++ .../util/scripting-engines/trace-event-python.c| 41 ++ 3 files changed, 138 insertions(+) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index e6a9c450133e..ffbb3e7d3288 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -519,3 +519,92 @@ int db_export__call_return(struct db_export *dbe, struct call_return *cr, return 0; } + +static int db_export__pid_tid(struct db_export *dbe, struct machine *machine, + pid_t pid, pid_t tid, u64 *db_id, + struct comm **comm_ptr, bool *is_idle) +{ + struct thread *thread = machine__find_thread(machine, pid, tid); + struct thread *main_thread; + int err = 0; + + if (!thread || !thread->comm_set) + goto out_put; + + *is_idle = !thread->pid_ && !thread->tid; + + main_thread = thread__main_thread(machine, thread); + + err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr); + + *db_id = thread->db_id; + + thread__put(main_thread); +out_put: + thread__put(thread); + + return err; +} + +int db_export__switch(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct machine *machine) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + bool out_preempt = out && + (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT); + int flags = out | (out_preempt << 1); + bool is_idle_a = false, is_idle_b = false; + u64 th_a_id = 0, th_b_id = 0; + u64 comm_out_id, comm_in_id; + struct comm *comm_a = NULL; + struct comm *comm_b = NULL; + u64 th_out_id, th_in_id; + u64 db_id; + int err; + + err = db_export__machine(dbe, machine); + if (err) + return err; + + err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid, +&th_a_id, &comm_a, &is_idle_a); + if (err) + return err; + + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + pid_t pid = event->context_switch.next_prev_pid; + pid_t tid = event->context_switch.next_prev_tid; + + err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id, +&comm_b, &is_idle_b); + if (err) + return err; + } + + /* +* Do not export if both threads are unknown (i.e. not being traced), +* or one is unknown and the other is the idle task. +*/ + if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b)) + return 0; + + db_id = ++dbe->context_switch_last_db_id; + + if (out) { + th_out_id = th_a_id; + th_in_id= th_b_id; + comm_out_id = comm_a ? comm_a->db_id : 0; + comm_in_id = comm_b ? comm_b->db_id : 0; + } else { + th_out_id = th_b_id; + th_in_id= th_a_id; + comm_out_id = comm_b ? comm_b->db_id : 0; + comm_in_id = comm_a ? comm_a->db_id : 0; + } + + if (dbe->export_context_switch) + return dbe->export_context_switch(dbe, db_id, machine, sample, + th_out_id, comm_out_id, + th_in_id, comm_in_id, flags); + return 0; +} diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index f5f0865f07e1..ba1f62a5fe10 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -57,6 +57,11 @@ struct db_export { int (*export_call_path)(struct db_export *dbe, struct call_path *cp); int (*export_call_return)(struct db_export *dbe, struct call_return *cr); + int (*export_context_switch)(struct db_export *dbe, u64 db_id, +struct machine *machine, +struct perf_sample *sample, +u64 th_out_id, u64 comm_out_id, +u64 th_in_id, u64 comm_in_id, int flags);
[tip:perf/urgent] perf db-export: Factor out db_export__threads()
Commit-ID: b3694e6c0a05383891546c6e3cdef8659d50b653 Gitweb: https://git.kernel.org/tip/b3694e6c0a05383891546c6e3cdef8659d50b653 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:07 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:35:18 -0300 perf db-export: Factor out db_export__threads() In preparation for exporting switch events, factor out db_export__threads(). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-19-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 82 ++--- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 5057fdd7f62d..e6a9c450133e 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -286,50 +286,32 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, return 0; } -int db_export__sample(struct db_export *dbe, union perf_event *event, - struct perf_sample *sample, struct perf_evsel *evsel, - struct addr_location *al) +static int db_export__threads(struct db_export *dbe, struct thread *thread, + struct thread *main_thread, + struct machine *machine, struct comm **comm_ptr) { - struct thread *thread = al->thread; - struct export_sample es = { - .event = event, - .sample = sample, - .evsel = evsel, - .al = al, - }; - struct thread *main_thread; struct comm *comm = NULL; struct comm *curr_comm; int err; - err = db_export__evsel(dbe, evsel); - if (err) - return err; - - err = db_export__machine(dbe, al->machine); - if (err) - return err; - - main_thread = thread__main_thread(al->machine, thread); if (main_thread) { /* * A thread has a reference to the main thread, so export the * main thread first. */ - err = db_export__thread(dbe, main_thread, al->machine, - main_thread); + err = db_export__thread(dbe, main_thread, machine, main_thread); if (err) - goto out_put; + return err; /* * Export comm before exporting the non-main thread because * db_export__comm_thread() can be called further below. */ - comm = machine__thread_exec_comm(al->machine, main_thread); + comm = machine__thread_exec_comm(machine, main_thread); if (comm) { err = db_export__exec_comm(dbe, comm, main_thread); if (err) - goto out_put; - es.comm_db_id = comm->db_id; + return err; + *comm_ptr = comm; } } @@ -340,23 +322,55 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, */ bool export_comm_thread = comm && !thread->db_id; - err = db_export__thread(dbe, thread, al->machine, main_thread); + err = db_export__thread(dbe, thread, machine, main_thread); if (err) - goto out_put; + return err; if (export_comm_thread) { err = db_export__comm_thread(dbe, comm, thread); if (err) - goto out_put; + return err; } } curr_comm = thread__comm(thread); - if (curr_comm) { - err = db_export__comm(dbe, curr_comm, thread); - if (err) - goto out_put; - } + if (curr_comm) + return db_export__comm(dbe, curr_comm, thread); + + return 0; +} + +int db_export__sample(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct perf_evsel *evsel, + struct addr_location *al) +{ + struct thread *thread = al->thread; + struct export_sample es = { + .event = event, + .sample = sample, + .evsel = evsel, + .al = al, + }; + struct thread *main_thread; + struct comm *comm = NULL; + int err; + + err = db_export__evsel(dbe, evsel); + if (err) + return err; + + err = db_export__machine(dbe, al->machine); + if (err) + return err; + + main_thread = thread__main_thread(al->machine, thread); + + err = db_export__threads(dbe,
[tip:perf/urgent] perf scripts python: exported-sql-viewer.py: Use new 'has_calls' column
Commit-ID: 26c11206f433ea507a7541f48cb472b85870577e Gitweb: https://git.kernel.org/tip/26c11206f433ea507a7541f48cb472b85870577e Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:05 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:31:25 -0300 perf scripts python: exported-sql-viewer.py: Use new 'has_calls' column If the new 'has_calls' column is present, use it with the call graph and call tree to select only comms that have calls. Committer testing: Just started the exported-sql-view.py and accessed all the reports, no backtraces. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-17-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index dbbd7a5d9b60..61b3911d91e6 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -623,8 +623,11 @@ class CallGraphRootItem(CallGraphLevelItemBase): super(CallGraphRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 self.query_done = True + if_has_calls = "" + if IsSelectable(glb.db, "comms", columns = "has_calls"): + if_has_calls = " WHERE has_calls = TRUE" query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") + QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls) while query.next(): if not query.value(0): continue @@ -900,8 +903,11 @@ class CallTreeRootItem(CallGraphLevelItemBase): super(CallTreeRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 self.query_done = True + if_has_calls = "" + if IsSelectable(glb.db, "comms", columns = "has_calls"): + if_has_calls = " WHERE has_calls = TRUE" query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") + QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls) while query.next(): if not query.value(0): continue
[tip:perf/urgent] perf script: Add scripting operation process_switch()
Commit-ID: 5bf83c29a0ad2e78683c318b607539dbadbf7a3b Gitweb: https://git.kernel.org/tip/5bf83c29a0ad2e78683c318b607539dbadbf7a3b Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:06 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:34:09 -0300 perf script: Add scripting operation process_switch() Add scripting operation process_switch() to process switch events. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-18-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 +++- tools/perf/util/trace-event.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 79367087bd18..8f24865596af 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2289,6 +2289,12 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; + if (scripting_ops && scripting_ops->process_switch) + scripting_ops->process_switch(event, sample, machine); + + if (!script->show_switch_events) + return 0; + thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) { @@ -2467,7 +2473,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap = process_mmap_event; script->tool.mmap2 = process_mmap2_event; } - if (script->show_switch_events) + if (script->show_switch_events || (scripting_ops && scripting_ops->process_switch)) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d9b0a942090a..c7002fe11673 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -81,6 +81,9 @@ struct scripting_ops { struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al); + void (*process_switch)(union perf_event *event, + struct perf_sample *sample, + struct machine *machine); void (*process_stat)(struct perf_stat_config *config, struct perf_evsel *evsel, u64 tstamp); void (*process_stat_interval)(u64 tstamp);
[tip:perf/urgent] perf scripts python: exported-sql-viewer.py: Remove redundant semi-colons
Commit-ID: 266887291cac7f4020b5c83d2af9a13aece44a74 Gitweb: https://git.kernel.org/tip/266887291cac7f4020b5c83d2af9a13aece44a74 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:04 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:31:01 -0300 perf scripts python: exported-sql-viewer.py: Remove redundant semi-colons Remove redundant semi-colons added inadvertently. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-16-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 6e7934f2ac9a..dbbd7a5d9b60 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -392,7 +392,7 @@ class FindBar(): self.hbox.addWidget(self.close_button) self.bar = QWidget() - self.bar.setLayout(self.hbox); + self.bar.setLayout(self.hbox) self.bar.hide() def Widget(self): @@ -470,7 +470,7 @@ class CallGraphLevelItemBase(object): self.params = params self.row = row self.parent_item = parent_item - self.query_done = False; + self.query_done = False self.child_count = 0 self.child_items = [] if parent_item: @@ -517,7 +517,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): self.time = time def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) if self.params.have_ipc: ipc_str = ", SUM(insn_count), SUM(cyc_count)" @@ -604,7 +604,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): self.dbid = comm_id def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) QueryExec(query, "SELECT thread_id, pid, tid" " FROM comm_threads" @@ -622,7 +622,7 @@ class CallGraphRootItem(CallGraphLevelItemBase): def __init__(self, glb, params): super(CallGraphRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 - self.query_done = True; + self.query_done = True query = QSqlQuery(glb.db) QueryExec(query, "SELECT id, comm FROM comms") while query.next(): @@ -793,7 +793,7 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): self.time = time def Select(self): - self.query_done = True; + self.query_done = True if self.calls_id == 0: comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) else: @@ -881,7 +881,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): self.dbid = comm_id def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) QueryExec(query, "SELECT thread_id, pid, tid" " FROM comm_threads" @@ -899,7 +899,7 @@ class CallTreeRootItem(CallGraphLevelItemBase): def __init__(self, glb, params): super(CallTreeRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 - self.query_done = True; + self.query_done = True query = QSqlQuery(glb.db) QueryExec(query, "SELECT id, comm FROM comms") while query.next(): @@ -971,7 +971,7 @@ class VBox(): def __init__(self, w1, w2, w3=None): self.vbox = QWidget() - self.vbox.setLayout(QVBoxLayout()); + self.vbox.setLayout(QVBoxLayout()) self.vbox.layout().setContentsMargins(0, 0, 0, 0) @@ -1391,7 +1391,7 @@ class FetchMoreRecordsBar(): self.hbox.addWidget(self.close_button) self.bar = QWidget() - self.bar.setLayout(self.hbox); + self.bar.setLayout(self.hbox) self.bar.show() self.in_progress = False @@ -2206,7 +2206,7 @@ class ReportDialogBase(QDialog): self.vbox.addLayout(self.grid) self.vbox.addLayout(self.hbox) - self.setLayout(self.vbox); + self.setLayout(self.vbox) def Ok(self): vars = self.report_vars @@ -3139,7 +3139,7 @@ class AboutDialog(QDialo
[tip:perf/urgent] perf scripts python: export-to-postgresql.py: Add has_calls column to comms table
Commit-ID: d9efc1d25214da500d6592095b542b32c15459df Gitweb: https://git.kernel.org/tip/d9efc1d25214da500d6592095b542b32c15459df Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:03 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:25:05 -0300 perf scripts python: export-to-postgresql.py: Add has_calls column to comms table Now that a thread's current comm is exported, it shows up in the call graph and call tree even if it has no calls. That can happen because the calls are recorded against the main thread's initial comm. Add a table column to make it easy for the exported-sql-viewer.py script to select only comms with calls. Committer testing: $ rm -f simple-retpoline.db $ sudo ~acme/bin/perf script -i simple-retpoline.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py simple-retpoline.db branches calls 2019-07-10 12:25:33.200529 Creating database ... 2019-07-10 12:25:33.211548 Writing records... 2019-07-10 12:25:33.549630 Adding indexes 2019-07-10 12:25:33.560715 Dropping unused tables 2019-07-10 12:25:33.580201 Done $ sha256sum tools/perf/scripts/python/export-to-sqlite.py ~/libexec/perf-core/scripts/python/export-to-sqlite.py 2922b642c392004dffa1d8789296478c85904623f5895bcb9b6cbf33e3ca999f tools/perf/scripts/python/export-to-sqlite.py 2922b642c392004dffa1d8789296478c85904623f5895bcb9b6cbf33e3ca999f /home/acme/libexec/perf-core/scripts/python/export-to-sqlite.py $ $ sqlite3 simple-retpoline.db SQLite version 3.26.0 2018-12-01 12:34:55 Enter ".help" for usage hints. sqlite> .schema comms CREATE TABLE comms (id integer NOT NULL PRIMARY KEY,comm varchar(16),c_thread_id bigint,c_time bigint,exec_flag boolean, has_calls boolean); sqlite> select id,has_calls from comms; 0|1 1|1 sqlite> select distinct comm_id from calls; 0 1 sqlite> Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-15-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 01f37877f5bb..13205e4e5b3b 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -886,6 +886,8 @@ def trace_end(): 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE comms ADD has_calls boolean') + do_query(query, 'UPDATE comms SET has_calls = TRUE WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)') do_query(query, 'ALTER TABLE ptwrite ' 'ADD CONSTRAINT idfkFOREIGN KEY (id) REFERENCES samples (id)') do_query(query, 'ALTER TABLE cbr '
[tip:perf/urgent] perf scripts python: export-to-sqlite.py: Add has_calls column to comms table
Commit-ID: ecc8c9984dae9812a10936cb9c74957b68075e07 Gitweb: https://git.kernel.org/tip/ecc8c9984dae9812a10936cb9c74957b68075e07 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:02 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:23:55 -0300 perf scripts python: export-to-sqlite.py: Add has_calls column to comms table Now that a thread's current comm is exported, it shows up in the call graph and call tree even if it has no calls. That can happen because the calls are recorded against the main thread's initial comm. Add a table column to make it easy for the exported-sql-viewer.py script to select only comms with calls. Committer notes: Running the export-to-sqlite.py worked without warnings and using the exported-sql-viewer.py worked as before. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-14-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 97aa66dd2fe1..9156f6a1e5f0 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -606,6 +606,8 @@ def trace_end(): if perf_db_export_calls: do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE comms ADD has_calls boolean') + do_query(query, 'UPDATE comms SET has_calls = 1 WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)') printdate("Dropping unused tables") if is_table_empty("ptwrite"):
[tip:perf/urgent] perf db-export: Also export thread's current comm
Commit-ID: 4650c7bed79582c74452d284e45d5b76987c0ef3 Gitweb: https://git.kernel.org/tip/4650c7bed79582c74452d284e45d5b76987c0ef3 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:01 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:14:07 -0300 perf db-export: Also export thread's current comm Currently, the initial comm of the main thread is exported. Export also a thread's current comm. That better supports the tracing of multi-threaded applications that set different comms for different threads to make it easier to distinguish them. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-13-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 8 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b1e581c13963..5057fdd7f62d 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -299,6 +299,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, }; struct thread *main_thread; struct comm *comm = NULL; + struct comm *curr_comm; int err; err = db_export__evsel(dbe, evsel); @@ -350,6 +351,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, } } + curr_comm = thread__comm(thread); + if (curr_comm) { + err = db_export__comm(dbe, curr_comm, thread); + if (err) + goto out_put; + } + es.db_id = ++dbe->sample_last_db_id; err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
[tip:perf/urgent] perf db-export: Factor out db_export__comm()
Commit-ID: 80859c947a1eb170927d03e713abf7550a3d8766 Gitweb: https://git.kernel.org/tip/80859c947a1eb170927d03e713abf7550a3d8766 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:58:00 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:13:51 -0300 perf db-export: Factor out db_export__comm() In preparation for exporting the current comm for a thread, factor out db_export__comm(). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-12-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 30 +++--- tools/perf/util/db-export.h | 2 ++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b0504d3eb130..b1e581c13963 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -78,6 +78,26 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, return 0; } +static int __db_export__comm(struct db_export *dbe, struct comm *comm, +struct thread *thread) +{ + comm->db_id = ++dbe->comm_last_db_id; + + if (dbe->export_comm) + return dbe->export_comm(dbe, comm, thread); + + return 0; +} + +int db_export__comm(struct db_export *dbe, struct comm *comm, + struct thread *thread) +{ + if (comm->db_id) + return 0; + + return __db_export__comm(dbe, comm, thread); +} + /* * Export the "exec" comm. The "exec" comm is the program / application command * name at the time it first executes. It is used to group threads for the same @@ -92,13 +112,9 @@ int db_export__exec_comm(struct db_export *dbe, struct comm *comm, if (comm->db_id) return 0; - comm->db_id = ++dbe->comm_last_db_id; - - if (dbe->export_comm) { - err = dbe->export_comm(dbe, comm, main_thread); - if (err) - return err; - } + err = __db_export__comm(dbe, comm, main_thread); + if (err) + return err; /* * Record the main thread for this comm. Note that the main thread can diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 29f7c3b035a7..f5f0865f07e1 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -77,6 +77,8 @@ int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); int db_export__thread(struct db_export *dbe, struct thread *thread, struct machine *machine, struct thread *main_thread); +int db_export__comm(struct db_export *dbe, struct comm *comm, + struct thread *thread); int db_export__exec_comm(struct db_export *dbe, struct comm *comm, struct thread *main_thread); int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
[tip:perf/urgent] perf scripts python: export-to-postgresql.py: Export comm details
Commit-ID: 8534b5de81802a82b13fe05acc3e749e3baf980e Gitweb: https://git.kernel.org/tip/8534b5de81802a82b13fe05acc3e749e3baf980e Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:59 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:13:36 -0300 perf scripts python: export-to-postgresql.py: Export comm details Add table columns for thread id, comm start time and exec flag. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-11-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 92713d93e956..01f37877f5bb 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -353,7 +353,10 @@ do_query(query, 'CREATE TABLE threads (' 'tidinteger)') do_query(query, 'CREATE TABLE comms (' 'id bigint NOT NULL,' - 'comm varchar(16))') + 'comm varchar(16),' + 'c_thread_idbigint,' + 'c_time bigint,' + 'exec_flag boolean)') do_query(query, 'CREATE TABLE comm_threads (' 'id bigint NOT NULL,' 'comm_idbigint,' @@ -763,7 +766,7 @@ def trace_begin(): evsel_table(0, "unknown") machine_table(0, 0, "unknown") thread_table(0, 0, 0, -1, -1) - comm_table(0, "unknown") + comm_table(0, "unknown", 0, 0, 0) dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -851,6 +854,8 @@ def trace_end(): do_query(query, 'ALTER TABLE threads ' 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads(id)') + do_query(query, 'ALTER TABLE comms ' + 'ADD CONSTRAINT threadfk FOREIGN KEY (c_thread_id) REFERENCES threads(id)') do_query(query, 'ALTER TABLE comm_threads ' 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id)REFERENCES threads(id)') @@ -935,11 +940,11 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x): value = struct.pack("!hiqiqiq", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid) thread_file.write(value) -def comm_table(comm_id, comm_str, *x): +def comm_table(comm_id, comm_str, thread_id, time, exec_flag, *x): comm_str = toserverstr(comm_str) n = len(comm_str) - fmt = "!hiqi" + str(n) + "s" - value = struct.pack(fmt, 2, 8, comm_id, n, comm_str) + fmt = "!hiqi" + str(n) + "s" + "iqiqiB" + value = struct.pack(fmt, 5, 8, comm_id, n, comm_str, 8, thread_id, 8, time, 1, exec_flag) comm_file.write(value) def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
[tip:perf/urgent] perf scripts python: export-to-sqlite.py: Export comm details
Commit-ID: 41085f2bdd5882632e7dd88d1e5b59b7eac2a2a9 Gitweb: https://git.kernel.org/tip/41085f2bdd5882632e7dd88d1e5b59b7eac2a2a9 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:58 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:13:26 -0300 perf scripts python: export-to-sqlite.py: Export comm details Add table columns for thread id, comm start time and exec flag. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-10-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 021326c46285..97aa66dd2fe1 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -177,7 +177,10 @@ do_query(query, 'CREATE TABLE threads (' 'tidinteger)') do_query(query, 'CREATE TABLE comms (' 'id integer NOT NULLPRIMARY KEY,' - 'comm varchar(16))') + 'comm varchar(16),' + 'c_thread_idbigint,' + 'c_time bigint,' + 'exec_flag boolean)') do_query(query, 'CREATE TABLE comm_threads (' 'id integer NOT NULLPRIMARY KEY,' 'comm_idbigint,' @@ -536,7 +539,7 @@ machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)") thread_query = QSqlQuery(db) thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)") comm_query = QSqlQuery(db) -comm_query.prepare("INSERT INTO comms VALUES (?, ?)") +comm_query.prepare("INSERT INTO comms VALUES (?, ?, ?, ?, ?)") comm_thread_query = QSqlQuery(db) comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)") dso_query = QSqlQuery(db) @@ -576,7 +579,7 @@ def trace_begin(): evsel_table(0, "unknown") machine_table(0, 0, "unknown") thread_table(0, 0, 0, -1, -1) - comm_table(0, "unknown") + comm_table(0, "unknown", 0, 0, 0) dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -642,7 +645,7 @@ def thread_table(*x): bind_exec(thread_query, 5, x) def comm_table(*x): - bind_exec(comm_query, 2, x) + bind_exec(comm_query, 5, x) def comm_thread_table(*x): bind_exec(comm_thread_query, 3, x)
[tip:perf/urgent] perf db-export: Export comm details
Commit-ID: 8ebf5cc0f6ce469d65ba2e8ce519dae34f0b3f50 Gitweb: https://git.kernel.org/tip/8ebf5cc0f6ce469d65ba2e8ce519dae34f0b3f50 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:57 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:13:08 -0300 perf db-export: Export comm details In preparation for exporting the current comm for a thread, export comm thread id, start time and exec flag. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-9-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c| 2 +- tools/perf/util/db-export.h| 3 ++- tools/perf/util/scripting-engines/trace-event-python.c | 8 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 2c3a4ad68428..b0504d3eb130 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -95,7 +95,7 @@ int db_export__exec_comm(struct db_export *dbe, struct comm *comm, comm->db_id = ++dbe->comm_last_db_id; if (dbe->export_comm) { - err = dbe->export_comm(dbe, comm); + err = dbe->export_comm(dbe, comm, main_thread); if (err) return err; } diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 811a678a910d..29f7c3b035a7 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -43,7 +43,8 @@ struct db_export { int (*export_machine)(struct db_export *dbe, struct machine *machine); int (*export_thread)(struct db_export *dbe, struct thread *thread, u64 main_thread_db_id, struct machine *machine); - int (*export_comm)(struct db_export *dbe, struct comm *comm); + int (*export_comm)(struct db_export *dbe, struct comm *comm, + struct thread *thread); int (*export_comm_thread)(struct db_export *dbe, u64 db_id, struct comm *comm, struct thread *thread); int (*export_dso)(struct db_export *dbe, struct dso *dso, diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c9837f0f0fd6..28167e938cef 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1011,15 +1011,19 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread, return 0; } -static int python_export_comm(struct db_export *dbe, struct comm *comm) +static int python_export_comm(struct db_export *dbe, struct comm *comm, + struct thread *thread) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(2); + t = tuple_new(5); tuple_set_u64(t, 0, comm->db_id); tuple_set_string(t, 1, comm__str(comm)); + tuple_set_u64(t, 2, thread->db_id); + tuple_set_u64(t, 3, comm->start); + tuple_set_s32(t, 4, comm->exec); call_object(tables->comm_handler, t, "comm_table");
[tip:perf/urgent] perf db-export: Fix a white space issue in db_export__sample()
Commit-ID: a5defb2f3984e0f056e4113b54c461782796c7be Gitweb: https://git.kernel.org/tip/a5defb2f3984e0f056e4113b54c461782796c7be Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:56 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:12:56 -0300 perf db-export: Fix a white space issue in db_export__sample() Fix a white space issue in db_export__sample() Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-8-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 78f62a733b9d..2c3a4ad68428 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -274,7 +274,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { - struct thread* thread = al->thread; + struct thread *thread = al->thread; struct export_sample es = { .event = event, .sample = sample,
[tip:perf/urgent] perf db-export: Move export__comm_thread into db_export__sample()
Commit-ID: 1ed119589834e25c130fdaa911ca8b0e3fd1cddf Gitweb: https://git.kernel.org/tip/1ed119589834e25c130fdaa911ca8b0e3fd1cddf Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:55 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:12:45 -0300 perf db-export: Move export__comm_thread into db_export__sample() Move call to db_export__comm_thread() from db_export__thread() into db_export__sample() because it makes the code easier to understand, and add explanatory comments. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-7-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 35 +-- tools/perf/util/db-export.h | 3 +-- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 99ad759561de..78f62a733b9d 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -59,25 +59,17 @@ int db_export__machine(struct db_export *dbe, struct machine *machine) } int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm, - struct thread *main_thread) + struct machine *machine, struct thread *main_thread) { u64 main_thread_db_id = 0; - int err; if (thread->db_id) return 0; thread->db_id = ++dbe->thread_last_db_id; - if (main_thread) { - if (main_thread != thread && comm) { - err = db_export__comm_thread(dbe, comm, thread); - if (err) - return err; - } + if (main_thread) main_thread_db_id = main_thread->db_id; - } if (dbe->export_thread) return dbe->export_thread(dbe, thread, main_thread_db_id, @@ -303,15 +295,19 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, main_thread = thread__main_thread(al->machine, thread); if (main_thread) { - comm = machine__thread_exec_comm(al->machine, main_thread); /* * A thread has a reference to the main thread, so export the * main thread first. */ - err = db_export__thread(dbe, main_thread, al->machine, comm, + err = db_export__thread(dbe, main_thread, al->machine, main_thread); if (err) goto out_put; + /* +* Export comm before exporting the non-main thread because +* db_export__comm_thread() can be called further below. +*/ + comm = machine__thread_exec_comm(al->machine, main_thread); if (comm) { err = db_export__exec_comm(dbe, comm, main_thread); if (err) @@ -321,10 +317,21 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, } if (thread != main_thread) { - err = db_export__thread(dbe, thread, al->machine, comm, - main_thread); + /* +* For a non-main thread, db_export__comm_thread() must be +* called only if thread has not previously been exported. +*/ + bool export_comm_thread = comm && !thread->db_id; + + err = db_export__thread(dbe, thread, al->machine, main_thread); if (err) goto out_put; + + if (export_comm_thread) { + err = db_export__comm_thread(dbe, comm, thread); + if (err) + goto out_put; + } } es.db_id = ++dbe->sample_last_db_id; diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 6e267321594c..811a678a910d 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -75,8 +75,7 @@ void db_export__exit(struct db_export *dbe); int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm, - struct thread *main_thread); + struct machine *machine, struct thread *main_thread); int db_export__exec_comm(struct db_export *dbe, struct comm *comm, struct thread *main_thread); int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
[tip:perf/urgent] perf db-export: Export comm before exporting thread
Commit-ID: 6319790bcf825bcc4cd9bf01f01ae404a2fb7da8 Gitweb: https://git.kernel.org/tip/6319790bcf825bcc4cd9bf01f01ae404a2fb7da8 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:54 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:12:25 -0300 perf db-export: Export comm before exporting thread Export comm before exporting the non-main thread because db_export__thread() also exports the comm_thread. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-6-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 63f9edf65eee..99ad759561de 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -312,6 +312,12 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, main_thread); if (err) goto out_put; + if (comm) { + err = db_export__exec_comm(dbe, comm, main_thread); + if (err) + goto out_put; + es.comm_db_id = comm->db_id; + } } if (thread != main_thread) { @@ -321,13 +327,6 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, goto out_put; } - if (comm) { - err = db_export__exec_comm(dbe, comm, main_thread); - if (err) - goto out_put; - es.comm_db_id = comm->db_id; - } - es.db_id = ++dbe->sample_last_db_id; err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
[tip:perf/urgent] perf db-export: Export main_thread in db_export__sample()
Commit-ID: 19207d86940db9dad5f2e0a270a2490f7da451e3 Gitweb: https://git.kernel.org/tip/19207d86940db9dad5f2e0a270a2490f7da451e3 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:53 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:12:05 -0300 perf db-export: Export main_thread in db_export__sample() Export main_thread in db_export__sample() because it makes the code easier to understand, and prepares db_export__thread() for further simplification. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-5-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 30 ++ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 14501236c046..63f9edf65eee 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -71,16 +71,10 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, thread->db_id = ++dbe->thread_last_db_id; if (main_thread) { - if (main_thread != thread) { - err = db_export__thread(dbe, main_thread, machine, - comm, main_thread); + if (main_thread != thread && comm) { + err = db_export__comm_thread(dbe, comm, thread); if (err) return err; - if (comm) { - err = db_export__comm_thread(dbe, comm, thread); - if (err) - return err; - } } main_thread_db_id = main_thread->db_id; } @@ -308,12 +302,24 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, return err; main_thread = thread__main_thread(al->machine, thread); - if (main_thread) + if (main_thread) { comm = machine__thread_exec_comm(al->machine, main_thread); + /* +* A thread has a reference to the main thread, so export the +* main thread first. +*/ + err = db_export__thread(dbe, main_thread, al->machine, comm, + main_thread); + if (err) + goto out_put; + } - err = db_export__thread(dbe, thread, al->machine, comm, main_thread); - if (err) - goto out_put; + if (thread != main_thread) { + err = db_export__thread(dbe, thread, al->machine, comm, + main_thread); + if (err) + goto out_put; + } if (comm) { err = db_export__exec_comm(dbe, comm, main_thread);
[tip:perf/urgent] perf db-export: Pass main_thread to db_export__thread()
Commit-ID: ed5c0a16feb9f1a4347f109d5e9607f6f38688a0 Gitweb: https://git.kernel.org/tip/ed5c0a16feb9f1a4347f109d5e9607f6f38688a0 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:52 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:11:04 -0300 perf db-export: Pass main_thread to db_export__thread() Calls to db_export__thread() already have main_thread so there is no reason to get it again, instead pass it as a parameter. Note that one difference in this approach is that the main thread is not created if it does not exist. It is better if it is not created because: - If main_thread is being traced it will have been created already. - If it is not being traced, there will be no other information about it, and it will never get deleted because there will be no EXIT event. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 29 - tools/perf/util/db-export.h | 3 ++- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 8fab57f90cbc..14501236c046 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -59,9 +59,9 @@ int db_export__machine(struct db_export *dbe, struct machine *machine) } int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm) + struct machine *machine, struct comm *comm, + struct thread *main_thread) { - struct thread *main_thread; u64 main_thread_db_id = 0; int err; @@ -70,28 +70,19 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, thread->db_id = ++dbe->thread_last_db_id; - if (thread->pid_ != -1) { - if (thread->pid_ == thread->tid) { - main_thread = thread; - } else { - main_thread = machine__findnew_thread(machine, - thread->pid_, - thread->pid_); - if (!main_thread) - return -ENOMEM; + if (main_thread) { + if (main_thread != thread) { err = db_export__thread(dbe, main_thread, machine, - comm); + comm, main_thread); if (err) - goto out_put; + return err; if (comm) { err = db_export__comm_thread(dbe, comm, thread); if (err) - goto out_put; + return err; } } main_thread_db_id = main_thread->db_id; - if (main_thread != thread) - thread__put(main_thread); } if (dbe->export_thread) @@ -99,10 +90,6 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, machine); return 0; - -out_put: - thread__put(main_thread); - return err; } /* @@ -324,7 +311,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (main_thread) comm = machine__thread_exec_comm(al->machine, main_thread); - err = db_export__thread(dbe, thread, al->machine, comm); + err = db_export__thread(dbe, thread, al->machine, comm, main_thread); if (err) goto out_put; diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 148a657b1887..6e267321594c 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -75,7 +75,8 @@ void db_export__exit(struct db_export *dbe); int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm); + struct machine *machine, struct comm *comm, + struct thread *main_thread); int db_export__exec_comm(struct db_export *dbe, struct comm *comm, struct thread *main_thread); int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
[tip:perf/urgent] perf db-export: Rename db_export__comm() to db_export__exec_comm()
Commit-ID: 208032fef13b68cf1eefc945dafb82efc88c6b8f Gitweb: https://git.kernel.org/tip/208032fef13b68cf1eefc945dafb82efc88c6b8f Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:51 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:10:27 -0300 perf db-export: Rename db_export__comm() to db_export__exec_comm() Rename db_export__comm() to db_export__exec_comm() to better reflect what it does and add explanatory comments. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190710085810.1650-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 22 +++--- tools/perf/util/db-export.h | 4 ++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 34cf197fe74f..8fab57f90cbc 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -105,8 +105,14 @@ out_put: return err; } -int db_export__comm(struct db_export *dbe, struct comm *comm, - struct thread *main_thread) +/* + * Export the "exec" comm. The "exec" comm is the program / application command + * name at the time it first executes. It is used to group threads for the same + * program. Note that the main thread pid (or thread group id tgid) cannot be + * used because it does not change when a new program is exec'ed. + */ +int db_export__exec_comm(struct db_export *dbe, struct comm *comm, +struct thread *main_thread) { int err; @@ -121,6 +127,16 @@ int db_export__comm(struct db_export *dbe, struct comm *comm, return err; } + /* +* Record the main thread for this comm. Note that the main thread can +* have many "exec" comms because there will be a new one every time it +* exec's. An "exec" comm however will only ever have 1 main thread. +* That is different to any other threads for that same program because +* exec() will effectively kill them, so the relationship between the +* "exec" comm and non-main threads is 1-to-1. That is why +* db_export__comm_thread() is called here for the main thread, but it +* is called for non-main threads when they are exported. +*/ return db_export__comm_thread(dbe, comm, main_thread); } @@ -313,7 +329,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, goto out_put; if (comm) { - err = db_export__comm(dbe, comm, main_thread); + err = db_export__exec_comm(dbe, comm, main_thread); if (err) goto out_put; es.comm_db_id = comm->db_id; diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 261cfece8dee..148a657b1887 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -76,8 +76,8 @@ int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); int db_export__thread(struct db_export *dbe, struct thread *thread, struct machine *machine, struct comm *comm); -int db_export__comm(struct db_export *dbe, struct comm *comm, - struct thread *main_thread); +int db_export__exec_comm(struct db_export *dbe, struct comm *comm, +struct thread *main_thread); int db_export__comm_thread(struct db_export *dbe, struct comm *comm, struct thread *thread); int db_export__dso(struct db_export *dbe, struct dso *dso,
[tip:perf/urgent] perf db-export: Get rid of db_export__deferred()
Commit-ID: fead24e52383c3f8eb25b5426d52b430b84a8194 Gitweb: https://git.kernel.org/tip/fead24e52383c3f8eb25b5426d52b430b84a8194 Author: Adrian Hunter AuthorDate: Wed, 10 Jul 2019 11:57:50 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 10 Jul 2019 12:07:40 -0300 perf db-export: Get rid of db_export__deferred() db_export__deferred() deferred the export of comms if the comm string had not been "set" (changed from :) however that problem was fixed a long time ago by commit e803cf97a4f9 ("perf record: Synthesize COMM event for a command line workload"), so get rid of db_export__deferred(). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190710085810.1650-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c| 61 +- tools/perf/util/db-export.h| 2 - .../util/scripting-engines/trace-event-python.c| 4 +- 3 files changed, 2 insertions(+), 65 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 2394c7506abe..34cf197fe74f 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -20,70 +20,14 @@ #include "db-export.h" #include -struct deferred_export { - struct list_head node; - struct comm *comm; -}; - -static int db_export__deferred(struct db_export *dbe) -{ - struct deferred_export *de; - int err; - - while (!list_empty(&dbe->deferred)) { - de = list_entry(dbe->deferred.next, struct deferred_export, - node); - err = dbe->export_comm(dbe, de->comm); - list_del_init(&de->node); - free(de); - if (err) - return err; - } - - return 0; -} - -static void db_export__free_deferred(struct db_export *dbe) -{ - struct deferred_export *de; - - while (!list_empty(&dbe->deferred)) { - de = list_entry(dbe->deferred.next, struct deferred_export, - node); - list_del_init(&de->node); - free(de); - } -} - -static int db_export__defer_comm(struct db_export *dbe, struct comm *comm) -{ - struct deferred_export *de; - - de = zalloc(sizeof(struct deferred_export)); - if (!de) - return -ENOMEM; - - de->comm = comm; - list_add_tail(&de->node, &dbe->deferred); - - return 0; -} - int db_export__init(struct db_export *dbe) { memset(dbe, 0, sizeof(struct db_export)); - INIT_LIST_HEAD(&dbe->deferred); return 0; } -int db_export__flush(struct db_export *dbe) -{ - return db_export__deferred(dbe); -} - void db_export__exit(struct db_export *dbe) { - db_export__free_deferred(dbe); call_return_processor__free(dbe->crp); dbe->crp = NULL; } @@ -172,10 +116,7 @@ int db_export__comm(struct db_export *dbe, struct comm *comm, comm->db_id = ++dbe->comm_last_db_id; if (dbe->export_comm) { - if (main_thread->comm_set) - err = dbe->export_comm(dbe, comm); - else - err = db_export__defer_comm(dbe, comm); + err = dbe->export_comm(dbe, comm); if (err) return err; } diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index e8a64028a386..261cfece8dee 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -68,11 +68,9 @@ struct db_export { u64 sample_last_db_id; u64 call_path_last_db_id; u64 call_return_last_db_id; - struct list_head deferred; }; int db_export__init(struct db_export *dbe); -int db_export__flush(struct db_export *dbe); void db_export__exit(struct db_export *dbe); int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 112bed65232f..c9837f0f0fd6 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1620,9 +1620,7 @@ error: static int python_flush_script(void) { - struct tables *tables = &tables_global; - - return db_export__flush(&tables->dbe); + return 0; } /*
[tip:perf/urgent] perf scripts python: export-to-sqlite.py: Fix DROP VIEW power_events_view
Commit-ID: 1334bb94cd8a21217cb0c186925f9bc9d47adafc Gitweb: https://git.kernel.org/tip/1334bb94cd8a21217cb0c186925f9bc9d47adafc Author: Adrian Hunter AuthorDate: Mon, 8 Jul 2019 08:52:32 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 9 Jul 2019 10:13:28 -0300 perf scripts python: export-to-sqlite.py: Fix DROP VIEW power_events_view Drop power_events_view before its dependent tables. SQLite does not seem to mind but the fix was needed for PostgreSQL (export-to-postgresql.py script), so do the same fix for the SQLite. It is more logical and keeps the 2 scripts following the same approach. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Fixes: 5130c6e55531 ("perf scripts python: export-to-sqlite.py: Export Intel PT power and ptwrite events") Link: http://lkml.kernel.org/r/20190708055232.5032-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 3222a83f4184..021326c46285 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -608,11 +608,11 @@ def trace_end(): if is_table_empty("ptwrite"): drop("ptwrite") if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"): + do_query(query, 'DROP VIEW power_events_view'); drop("mwait") drop("pwre") drop("exstop") drop("pwrx") - do_query(query, 'DROP VIEW power_events_view'); if is_table_empty("cbr"): drop("cbr")
[tip:perf/urgent] perf scripts python: export-to-postgresql.py: Fix DROP VIEW power_events_view
Commit-ID: d8d051df9f906232715282cc0570c94273b197bc Gitweb: https://git.kernel.org/tip/d8d051df9f906232715282cc0570c94273b197bc Author: Adrian Hunter AuthorDate: Mon, 8 Jul 2019 08:52:31 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 9 Jul 2019 10:13:27 -0300 perf scripts python: export-to-postgresql.py: Fix DROP VIEW power_events_view PostgreSQL can error if power_events_view is not dropped before its dependent tables e.g. Exception: Query failed: ERROR: cannot drop table mwait because other objects depend on it DETAIL: view power_events_view depends on table mwait Signed-off-by: Adrian Hunter Cc: Jiri Olsa Fixes: aba44287a224 ("perf scripts python: export-to-postgresql.py: Export Intel PT power and ptwrite events") Link: http://lkml.kernel.org/r/20190708055232.5032-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 4447f0d7c754..92713d93e956 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -898,11 +898,11 @@ def trace_end(): if is_table_empty("ptwrite"): drop("ptwrite") if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"): + do_query(query, 'DROP VIEW power_events_view'); drop("mwait") drop("pwre") drop("exstop") drop("pwrx") - do_query(query, 'DROP VIEW power_events_view'); if is_table_empty("cbr"): drop("cbr")
[tip:perf/core] perf scripts python: export-to-postgresql.py: Export Intel PT power and ptwrite events
Commit-ID: aba44287a224dfcfdd99ba885ca9d9acc4de0c17 Gitweb: https://git.kernel.org/tip/aba44287a224dfcfdd99ba885ca9d9acc4de0c17 Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:48 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf scripts python: export-to-postgresql.py: Export Intel PT power and ptwrite events The format of synthesized events is determined by the attribute config. For the formats for Intel PT power and ptwrite events, create tables and populate them when the synth_data handler is called. If the tables remain empty, drop them at the end. The tables and views, including a combined power_events_view, will display automatically from the tables menu of the exported exported-sql-viewer.py script. Note, currently only Atoms since Gemini Lake have support for ptwrite and mwait, pwre, exstop and pwrx, but all Intel PT implementations support cbr. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-8-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 251 ++ 1 file changed, 251 insertions(+) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 93225c02117e..4447f0d7c754 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -447,6 +447,38 @@ if perf_db_export_calls: 'insn_count bigint,' 'cyc_count bigint)') +do_query(query, 'CREATE TABLE ptwrite (' + 'id bigint NOT NULL,' + 'payloadbigint,' + 'exact_ip boolean)') + +do_query(query, 'CREATE TABLE cbr (' + 'id bigint NOT NULL,' + 'cbrinteger,' + 'mhzinteger,' + 'percentinteger)') + +do_query(query, 'CREATE TABLE mwait (' + 'id bigint NOT NULL,' + 'hints integer,' + 'extensions integer)') + +do_query(query, 'CREATE TABLE pwre (' + 'id bigint NOT NULL,' + 'cstate integer,' + 'subcstate integer,' + 'hw boolean)') + +do_query(query, 'CREATE TABLE exstop (' + 'id bigint NOT NULL,' + 'exact_ip boolean)') + +do_query(query, 'CREATE TABLE pwrx (' + 'id bigint NOT NULL,' + 'deepest_cstate integer,' + 'last_cstateinteger,' + 'wake_reasoninteger)') + do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' 'id,' @@ -561,6 +593,104 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC' ' FROM samples') +do_query(query, 'CREATE VIEW ptwrite_view AS ' + 'SELECT ' + 'ptwrite.id,' + 'time,' + 'cpu,' + 'to_hex(payload) AS payload_hex,' + 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM ptwrite' + ' INNER JOIN samples ON samples.id = ptwrite.id') + +do_query(query, 'CREATE VIEW cbr_view AS ' + 'SELECT ' + 'cbr.id,' + 'time,' + 'cpu,' + 'cbr,' + 'mhz,' + 'percent' + ' FROM cbr' + ' INNER JOIN samples ON samples.id = cbr.id') + +do_query(query, 'CREATE VIEW mwait_view AS ' + 'SELECT ' + 'mwait.id,' + 'time,' + 'cpu,' + 'to_hex(hints) AS hints_hex,' + 'to_hex(extensions) AS extensions_hex' + ' FROM mwait' + ' INNER JOIN samples ON samples.id = mwait.id') + +do_query(query, 'CREATE VIEW pwre_view AS ' + 'SELECT ' + 'pwre.id,' + 'time,' + 'cpu,' + 'cstate,' + 'subcstate,' + 'CASE WHEN hw=FALSE THEN \'False\' ELSE \'True\' END AS hw' + ' FROM pwre' + ' INNER JOIN samples ON samples.id = pwre.id') + +do_query(query, 'CREATE VIEW exstop_view AS ' + 'SELECT ' + 'exstop.id,' + 'time,' + 'cpu,' + 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM exstop' + ' INNER JOIN samples ON samples.id = exstop.id') + +do_query(query, 'CREATE VIEW pwrx_view AS ' + 'SELECT ' + 'pwrx.id,' + 'time,' + 'cpu,' + 'deepest_cstate,' + 'last_cstate,' + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' + ' WHEN wake_reason=4
[tip:perf/core] perf scripts python: export-to-sqlite.py: Export Intel PT power and ptwrite events
Commit-ID: 5130c6e55531b9bbcdeb8b327711ff204bc4835f Gitweb: https://git.kernel.org/tip/5130c6e55531b9bbcdeb8b327711ff204bc4835f Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:47 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf scripts python: export-to-sqlite.py: Export Intel PT power and ptwrite events The format of synthesized events is determined by the attribute config. For the formats for Intel PT power and ptwrite events, create tables and populate them when the synth_data handler is called. If the tables remain empty, drop them at the end. The tables and views, including a combined power_events_view, will display automatically from the tables menu of the exported exported-sql-viewer.py script. Note, currently only Atoms since Gemini Lake have support for ptwrite and mwait, pwre, exstop and pwrx, but all Intel PT implementations support cbr. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-7-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 239 ++ 1 file changed, 239 insertions(+) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 4542ce89034b..3222a83f4184 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -271,6 +271,38 @@ if perf_db_export_calls: 'insn_count bigint,' 'cyc_count bigint)') +do_query(query, 'CREATE TABLE ptwrite (' + 'id integer NOT NULLPRIMARY KEY,' + 'payloadbigint,' + 'exact_ip integer)') + +do_query(query, 'CREATE TABLE cbr (' + 'id integer NOT NULLPRIMARY KEY,' + 'cbrinteger,' + 'mhzinteger,' + 'percentinteger)') + +do_query(query, 'CREATE TABLE mwait (' + 'id integer NOT NULLPRIMARY KEY,' + 'hints integer,' + 'extensions integer)') + +do_query(query, 'CREATE TABLE pwre (' + 'id integer NOT NULLPRIMARY KEY,' + 'cstate integer,' + 'subcstate integer,' + 'hw integer)') + +do_query(query, 'CREATE TABLE exstop (' + 'id integer NOT NULLPRIMARY KEY,' + 'exact_ip integer)') + +do_query(query, 'CREATE TABLE pwrx (' + 'id integer NOT NULLPRIMARY KEY,' + 'deepest_cstate integer,' + 'last_cstateinteger,' + 'wake_reasoninteger)') + # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False try: @@ -399,6 +431,102 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC' ' FROM samples') +do_query(query, 'CREATE VIEW ptwrite_view AS ' + 'SELECT ' + 'ptwrite.id,' + 'time,' + 'cpu,' + + emit_to_hex('payload') + ' AS payload_hex,' + 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM ptwrite' + ' INNER JOIN samples ON samples.id = ptwrite.id') + +do_query(query, 'CREATE VIEW cbr_view AS ' + 'SELECT ' + 'cbr.id,' + 'time,' + 'cpu,' + 'cbr,' + 'mhz,' + 'percent' + ' FROM cbr' + ' INNER JOIN samples ON samples.id = cbr.id') + +do_query(query, 'CREATE VIEW mwait_view AS ' + 'SELECT ' + 'mwait.id,' + 'time,' + 'cpu,' + + emit_to_hex('hints') + ' AS hints_hex,' + + emit_to_hex('extensions') + ' AS extensions_hex' + ' FROM mwait' + ' INNER JOIN samples ON samples.id = mwait.id') + +do_query(query, 'CREATE VIEW pwre_view AS ' + 'SELECT ' + 'pwre.id,' + 'time,' + 'cpu,' + 'cstate,' + 'subcstate,' + 'CASE WHEN hw=0 THEN \'False\' ELSE \'True\' END AS hw' + ' FROM pwre' + ' INNER JOIN samples ON samples.id = pwre.id') + +do_query(query, 'CREATE VIEW exstop_view AS ' + 'SELECT ' + 'exstop.id,' + 'time,' + 'cpu,' + 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM exstop' + ' INNER JOIN samples ON samples.id = exstop.id') + +do_query(query, 'CREATE VIEW pwrx_view AS ' + 'SELECT ' + 'pwrx.id,' + 'time,' + 'cpu,' +
[tip:perf/core] perf db-export: Export synth events
Commit-ID: b9322cab17a1092e2aa7ee2505ecceb0cd5fd685 Gitweb: https://git.kernel.org/tip/b9322cab17a1092e2aa7ee2505ecceb0cd5fd685 Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:46 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf db-export: Export synth events Synthesized events are samples but with architecture-specific data stored in sample->raw_data. They are identified by attribute type PERF_TYPE_SYNTH. Add a function to export them. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-6-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/scripting-engines/trace-event-python.c| 46 +- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 6acb379b53ec..112bed65232f 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -112,6 +112,7 @@ struct tables { PyObject*sample_handler; PyObject*call_path_handler; PyObject*call_return_handler; + PyObject*synth_handler; booldb_export_mode; }; @@ -947,6 +948,12 @@ static int tuple_set_string(PyObject *t, unsigned int pos, const char *s) return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s)); } +static int tuple_set_bytes(PyObject *t, unsigned int pos, void *bytes, + unsigned int sz) +{ + return PyTuple_SetItem(t, pos, _PyBytes_FromStringAndSize(bytes, sz)); +} + static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) { struct tables *tables = container_of(dbe, struct tables, dbe); @@ -1105,8 +1112,8 @@ static int python_export_branch_type(struct db_export *dbe, u32 branch_type, return 0; } -static int python_export_sample(struct db_export *dbe, - struct export_sample *es) +static void python_export_sample_table(struct db_export *dbe, + struct export_sample *es) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; @@ -1141,6 +1148,33 @@ static int python_export_sample(struct db_export *dbe, call_object(tables->sample_handler, t, "sample_table"); Py_DECREF(t); +} + +static void python_export_synth(struct db_export *dbe, struct export_sample *es) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(3); + + tuple_set_u64(t, 0, es->db_id); + tuple_set_u64(t, 1, es->evsel->attr.config); + tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size); + + call_object(tables->synth_handler, t, "synth_data"); + + Py_DECREF(t); +} + +static int python_export_sample(struct db_export *dbe, + struct export_sample *es) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + + python_export_sample_table(dbe, es); + + if (es->evsel->attr.type == PERF_TYPE_SYNTH && tables->synth_handler) + python_export_synth(dbe, es); return 0; } @@ -1477,6 +1511,14 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(sample); SET_TABLE_HANDLER(call_path); SET_TABLE_HANDLER(call_return); + + /* +* Synthesized events are samples but with architecture-specific data +* stored in sample->raw_data. They are exported via +* python_export_sample() and consequently do not need a separate export +* callback. +*/ + tables->synth_handler = get_handler("synth_data"); } #if PY_MAJOR_VERSION < 3
[tip:perf/core] perf intel-pt: Synthesize CBR events when last seen value changes
Commit-ID: 5fe2cf7d19c48f2b53b57e6a5786972bc1b8d738 Gitweb: https://git.kernel.org/tip/5fe2cf7d19c48f2b53b57e6a5786972bc1b8d738 Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:45 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf intel-pt: Synthesize CBR events when last seen value changes The first core-to-bus ratio (CBR) event will not be shown if --itrace 's' option (skip initial number of events) is used, nor if time intervals are specified that do not include the start of tracing. Change the logic to record the last CBR value seen by the user, and synthesize CBR events whenever that changes. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-5-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 65 +- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 550db6e77968..470aaae9d930 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -171,6 +171,7 @@ struct intel_pt_queue { u64 last_in_cyc_cnt; u64 last_br_insn_cnt; u64 last_br_cyc_cnt; + unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; }; @@ -1052,6 +1053,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt, ptq->cpu = queue->cpu; ptq->tid = queue->tid; + ptq->cbr_seen = UINT_MAX; + if (pt->sampling_mode && !pt->snapshot_mode && pt->timeless_decoding) ptq->step_through_buffers = true; @@ -1184,6 +1187,17 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt) pt->num_events++ < pt->synth_opts.initial_skip; } +/* + * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. + * Also ensure CBR is first non-skipped event by allowing for 4 more samples + * from this decoder state. + */ +static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) +{ + return pt->synth_opts.initial_skip && + pt->num_events + 4 < pt->synth_opts.initial_skip; +} + static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample) @@ -1429,9 +1443,11 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) struct perf_synth_intel_cbr raw; u32 flags; - if (intel_pt_skip_event(pt)) + if (intel_pt_skip_cbr_event(pt)) return 0; + ptq->cbr_seen = ptq->state->cbr; + intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1868,8 +1884,7 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) } #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ - INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ - INTEL_PT_CBR_CHG) + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) static int intel_pt_sample(struct intel_pt_queue *ptq) { @@ -1901,31 +1916,33 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } - if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { - if (state->type & INTEL_PT_CBR_CHG) { + if (pt->sample_pwr_events) { + if (ptq->state->cbr != ptq->cbr_seen) { err = intel_pt_synth_cbr_sample(ptq); if (err) return err; } - if (state->type & INTEL_PT_MWAIT_OP) { - err = intel_pt_synth_mwait_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_PWR_ENTRY) { - err = intel_pt_synth_pwre_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_EX_STOP) { - err = intel_pt_synth_exstop_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_PWR_EXIT) { - err = intel_pt_synth_pwrx_sample(ptq); - if (err) - return err; + if (state->type & INTEL_PT_PWR_EVT) { + if (state->type & INTEL_PT_MWAIT_OP) { + err = intel_pt_synth_mwait_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_ENTRY) { + err =
[tip:perf/core] perf intel-pt: Add CBR value to decoder state
Commit-ID: 51b091861828f5801207a00211ea4e94102389c3 Gitweb: https://git.kernel.org/tip/51b091861828f5801207a00211ea4e94102389c3 Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:44 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf intel-pt: Add CBR value to decoder state For convenience, add the core-to-bus ratio (CBR) value to the decoder state. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 + tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5eb792cc5d3a..4d14e78c5927 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2633,6 +2633,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; + decoder->state.cbr = decoder->cbr; } if (intel_pt_sample_time(decoder->pkt_state)) { intel_pt_update_sample_time(decoder); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 9957f2ccdca8..e289e463d635 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -213,6 +213,7 @@ struct intel_pt_state { uint64_t pwre_payload; uint64_t pwrx_payload; uint64_t cbr_payload; + uint32_t cbr; uint32_t flags; enum intel_pt_insn_op insn_op; int insn_len;
[tip:perf/core] perf intel-pt: Cater for CBR change in PSB+
Commit-ID: 91de8684f1cff6944634bfb9098dc3a2583f798c Gitweb: https://git.kernel.org/tip/91de8684f1cff6944634bfb9098dc3a2583f798c Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:43 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf intel-pt: Cater for CBR change in PSB+ PSB+ provides status information only so the core-to-bus ratio (CBR) in PSB+ will not have changed from its previous value. However, cater for the possibility of a another CBR change that gets caught up in the PSB+ anyway. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 3d2255f284f4..5eb792cc5d3a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1975,6 +1975,13 @@ next: goto next; if (err) return err; + /* +* PSB+ CBR will not have changed but cater for the +* possibility of another CBR change that gets caught up +* in the PSB+. +*/ + if (decoder->cbr != decoder->cbr_seen) + return 0; break; case INTEL_PT_PIP:
[tip:perf/core] perf intel-pt: Decoder to output CBR changes immediately
Commit-ID: abe5a1d3e4bee361bd3b21b8909c8421e46911d1 Gitweb: https://git.kernel.org/tip/abe5a1d3e4bee361bd3b21b8909c8421e46911d1 Author: Adrian Hunter AuthorDate: Sat, 22 Jun 2019 12:32:42 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf intel-pt: Decoder to output CBR changes immediately The core-to-bus ratio (CBR) provides the CPU frequency. With branches enabled, the decoder was outputting CBR changes only when there was a branch. That loses the correct time of the change if the trace is not in context (e.g. not tracing kernel space). Change to output the CBR change immediately. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190622093248.581-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 16 ++-- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f8b71bf2bb4c..3d2255f284f4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2015,16 +2015,8 @@ next: case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); - if (!decoder->branch_enable && - decoder->cbr != decoder->cbr_seen) { - decoder->cbr_seen = decoder->cbr; - decoder->state.type = INTEL_PT_CBR_CHG; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - decoder->state.cbr_payload = - decoder->packet.payload; + if (decoder->cbr != decoder->cbr_seen) return 0; - } break; case INTEL_PT_MODE_EXEC: @@ -2626,8 +2618,12 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } else { decoder->state.err = 0; - if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { + if (decoder->cbr != decoder->cbr_seen) { decoder->cbr_seen = decoder->cbr; + if (!decoder->state.type) { + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + } decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; }
[tip:perf/core] perf thread-stack: Eliminate code duplicating thread_stack__pop_ks()
Commit-ID: eb5d854456f5a4ccec6f9681b7196cf056df8cfa Gitweb: https://git.kernel.org/tip/eb5d854456f5a4ccec6f9681b7196cf056df8cfa Author: Adrian Hunter AuthorDate: Wed, 19 Jun 2019 09:44:29 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf thread-stack: Eliminate code duplicating thread_stack__pop_ks() Use new function thread_stack__pop_ks() in place of equivalent code. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190619064429.14940-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 18 ++ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 4c826a2e08d8..6ff1ff4d4ce7 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -664,12 +664,9 @@ static int thread_stack__no_call_return(struct thread *thread, if (ip >= ks && addr < ks) { /* Return to userspace, so pop all kernel addresses */ - while (thread_stack__in_kernel(ts)) { - err = thread_stack__call_return(thread, ts, --ts->cnt, - tm, ref, true); - if (err) - return err; - } + err = thread_stack__pop_ks(thread, ts, sample, ref); + if (err) + return err; /* If the stack is empty, push the userspace address */ if (!ts->cnt) { @@ -679,12 +676,9 @@ static int thread_stack__no_call_return(struct thread *thread, } } else if (thread_stack__in_kernel(ts) && ip < ks) { /* Return to userspace, so pop all kernel addresses */ - while (thread_stack__in_kernel(ts)) { - err = thread_stack__call_return(thread, ts, --ts->cnt, - tm, ref, true); - if (err) - return err; - } + err = thread_stack__pop_ks(thread, ts, sample, ref); + if (err) + return err; } if (ts->cnt)
[tip:perf/core] perf thread-stack: Fix thread stack return from kernel for kernel-only case
Commit-ID: 97860b483c5597663a174ff7405be957b4838391 Gitweb: https://git.kernel.org/tip/97860b483c5597663a174ff7405be957b4838391 Author: Adrian Hunter AuthorDate: Wed, 19 Jun 2019 09:44:28 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 25 Jun 2019 08:47:10 -0300 perf thread-stack: Fix thread stack return from kernel for kernel-only case Commit f08046cb3082 ("perf thread-stack: Represent jmps to the start of a different symbol") had the side-effect of introducing more stack entries before return from kernel space. When user space is also traced, those entries are popped before entry to user space, but when user space is not traced, they get stuck at the bottom of the stack, making the stack grow progressively larger. Fix by detecting a return-from-kernel branch type, and popping kernel addresses from the stack then. Note, the problem and fix affect the exported Call Graph / Tree but not the callindent option used by "perf script --call-trace". Example: perf-with-kcore record example -e intel_pt//k -- ls perf-with-kcore script example --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py example.db branches calls ~/libexec/perf-core/scripts/python/exported-sql-viewer.py example.db Menu option: Reports -> Context-Sensitive Call Graph Before: (showing Call Path column only) Call Path â–¶ perf â–¼ ls â–¼ 12111:12111 â–¶ setup_new_exec â–¶ __task_pid_nr_ns â–¶ perf_event_pid_type â–¶ perf_event_comm_output â–¶ perf_iterate_ctx â–¶ perf_iterate_sb â–¶ perf_event_comm â–¶ __set_task_comm â–¶ load_elf_binary â–¶ search_binary_handler â–¶ __do_execve_file.isra.41 â–¶ __x64_sys_execve â–¶ do_syscall_64 â–¼ entry_SYSCALL_64_after_hwframe â–¼ swapgs_restore_regs_and_return_to_usermode â–¼ native_iret â–¶ error_entry â–¶ do_page_fault â–¼ error_exit â–¼ retint_user â–¶ prepare_exit_to_usermode â–¼ native_iret â–¶ error_entry â–¶ do_page_fault â–¼ error_exit â–¼ retint_user â–¶ prepare_exit_to_usermode â–¼ native_iret â–¶ error_entry â–¶ do_page_fault â–¼ error_exit â–¼ retint_user â–¶ prepare_exit_to_usermode â–¶ native_iret After: (showing Call Path column only) Call Path â–¶ perf â–¼ ls â–¼ 12111:12111 â–¶ setup_new_exec â–¶ __task_pid_nr_ns â–¶ perf_event_pid_type â–¶ perf_event_comm_output â–¶ perf_iterate_ctx â–¶ perf_iterate_sb â–¶ perf_event_comm â–¶ __set_task_comm â–¶ load_elf_binary â–¶ search_binary_handler â–¶ __do_execve_file.isra.41 â–¶ __x64_sys_execve â–¶ do_syscall_64 â–¶ entry_SYSCALL_64_after_hwframe â–¶ page_fault â–¼ entry_SYSCALL_64 â–¼ do_syscall_64 â–¶ __x64_sys_brk â–¶ __x64_sys_access â–¶ __x64_sys_openat â–¶ __x64_sys_newfstat â–¶ __x64_sys_mmap â–¶ __x64_sys_close â–¶ __x64_sys_read â–¶ __x64_sys_mprotect â–¶ __x64_sys_arch_prctl â–¶ __x64_sys_munmap â–¶ exit_to_usermode_loop â–¶ __x64_sys_set_tid_address â–¶ __x64_sys_set_robust_list â–¶ __x64_sys_rt_sigaction â–¶ __x64_sys_rt_sigprocmask â–¶ __x64_sys_prlimit64 â–¶ __x64_sys_statfs â–¶ __x64_sys_ioctl â–¶ __x64_sys_getdents64 â–¶ __x64_sys_write â–¶ __x64_sys_exit_group Committer notes: The first arg to the perf-with-kcore needs to be the same for the 'record' and 'script' lines, otherwise we'll record the perf.data file and kcore_dir/ files in one directory ('example') to then try to use it from the 'bep' directory, fix the instructions above it so that both use 'example'. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: sta...@vger.kernel.org Fixes: f08046cb3082 ("perf thread-stack: Represent jmps to the start of a different symbol") Link: http://lkml.kernel.org/r/20190619064429.14940-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 30 +- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index c485186a8b6d..4c826a2e08d8 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -628,6 +628,23 @@ static int thread_stack__bottom(struct thread_stack *ts, true, false); } +static int thread_stack__pop_ks(struct thread *thread, struct threa
[tip:perf/core] perf intel-pt: Add callchain to synthesized PEBS sample
Commit-ID: e01f0ef509ea7e76929f24a074d241de52c6f82a Gitweb: https://git.kernel.org/tip/e01f0ef509ea7e76929f24a074d241de52c6f82a Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:28:03 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Add callchain to synthesized PEBS sample Like other synthesized events, if there is also an Intel PT branch trace, then a call stack can also be synthesized. Add that. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-12-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 8 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bf7647897e8a..550db6e77968 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1730,6 +1730,14 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.time = tsc_to_perf_time(timestamp, &pt->tc); } + if (sample_type & PERF_SAMPLE_CALLCHAIN && + pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, +pt->synth_opts.callchain_sz, sample.ip, +pt->kernel_start); + sample.callchain = ptq->chain; + } + if (sample_type & PERF_SAMPLE_REGS_INTR && items->mask[INTEL_PT_GP_REGS_POS]) { u64 regs[sizeof(sample.intr_regs.mask)];
[tip:perf/core] perf intel-pt: Add memory information to synthesized PEBS sample
Commit-ID: 975846eddf907297aa036544545cd839c7c7dd31 Gitweb: https://git.kernel.org/tip/975846eddf907297aa036544545cd839c7c7dd31 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:28:02 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Add memory information to synthesized PEBS sample Add memory information from PEBS data in the Intel PT trace to the synthesized PEBS sample. This provides sample types PERF_SAMPLE_ADDR, PERF_SAMPLE_WEIGHT, and PERF_SAMPLE_TRANSACTION, but not PERF_SAMPLE_DATA_SRC. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-11-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index db00c13dc36f..bf7647897e8a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1766,6 +1766,33 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) } } + if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) + sample.addr = items->mem_access_address; + + if (sample_type & PERF_SAMPLE_WEIGHT) { + /* +* Refer kernel's setup_pebs_adaptive_sample_data() and +* intel_hsw_weight(). +*/ + if (items->has_mem_access_latency) + sample.weight = items->mem_access_latency; + if (!sample.weight && items->has_tsx_aux_info) { + /* Cycles last block */ + sample.weight = (u32)items->tsx_aux_info; + } + } + + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { + u64 ax = items->has_rax ? items->rax : 0; + /* Refer kernel's intel_hsw_transaction() */ + u64 txn = (u8)(items->tsx_aux_info >> 32); + + /* For RTM XABORTs also log the abort code from AX */ + if (txn & PERF_TXN_TRANSACTION && ax & 1) + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + sample.transaction = txn; + } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); }
[tip:perf/core] perf intel-pt: Add XMM registers to synthesized PEBS sample
Commit-ID: 143d34a6b387b96aba42c49cb76d18ad3e3863e5 Gitweb: https://git.kernel.org/tip/143d34a6b387b96aba42c49cb76d18ad3e3863e5 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:28:00 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Add XMM registers to synthesized PEBS sample Add XMM register information from PEBS data in the Intel PT trace to the synthesized PEBS sample. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-9-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 30 +- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 00c2c96bb805..f83dd10bb7d0 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1603,6 +1603,31 @@ static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, return pos; } +#ifndef PERF_REG_X86_XMM0 +#define PERF_REG_X86_XMM0 32 +#endif + +static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, +const struct intel_pt_blk_items *items, +u64 regs_mask) +{ + u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); + const u64 *xmm = items->xmm; + + /* +* If there are any XMM registers, then there should be all of them. +* Nevertheless, follow the logic to add only registers that were +* requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), +* and update the resulting mask (i.e. 'intr_regs->mask') accordingly. +*/ + intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; + + for (; mask; mask >>= 1, xmm++) { + if (mask & 1) + *pos++ = *xmm; + } +} + static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) { const struct intel_pt_blk_items *items = &ptq->state->items; @@ -1657,13 +1682,16 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) items->mask[INTEL_PT_GP_REGS_POS]) { u64 regs[sizeof(sample.intr_regs.mask)]; u64 regs_mask = evsel->attr.sample_regs_intr; + u64 *pos; sample.intr_regs.abi = items->is_32_bit ? PERF_SAMPLE_REGS_ABI_32 : PERF_SAMPLE_REGS_ABI_64; sample.intr_regs.regs = regs; - intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + + intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); } return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
[tip:perf/core] perf intel-pt: Add LBR information to synthesized PEBS sample
Commit-ID: aa62afd7daac4b4cc95cd2454e3f43aa23f519c1 Gitweb: https://git.kernel.org/tip/aa62afd7daac4b4cc95cd2454e3f43aa23f519c1 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:28:01 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Add LBR information to synthesized PEBS sample Add LBR information from PEBS data in the Intel PT trace to the synthesized PEBS sample. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-10-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 72 ++ 1 file changed, 72 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f83dd10bb7d0..db00c13dc36f 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1628,6 +1628,58 @@ static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, } } +#define LBR_INFO_MISPRED (1ULL << 63) +#define LBR_INFO_IN_TX (1ULL << 62) +#define LBR_INFO_ABORT (1ULL << 61) +#define LBR_INFO_CYCLES0x + +/* Refer kernel's intel_pmu_store_pebs_lbrs() */ +static u64 intel_pt_lbr_flags(u64 info) +{ + union { + struct branch_flags flags; + u64 result; + } u = { + .flags = { + .mispred= !!(info & LBR_INFO_MISPRED), + .predicted = !(info & LBR_INFO_MISPRED), + .in_tx = !!(info & LBR_INFO_IN_TX), + .abort = !!(info & LBR_INFO_ABORT), + .cycles = info & LBR_INFO_CYCLES, + } + }; + + return u.result; +} + +static void intel_pt_add_lbrs(struct branch_stack *br_stack, + const struct intel_pt_blk_items *items) +{ + u64 *to; + int i; + + br_stack->nr = 0; + + to = &br_stack->entries[0].from; + + for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { + u32 mask = items->mask[i]; + const u64 *from = items->val[i]; + + for (; mask; mask >>= 3, from += 3) { + if ((mask & 7) == 7) { + *to++ = from[0]; + *to++ = from[1]; + *to++ = intel_pt_lbr_flags(from[2]); + br_stack->nr += 1; + } + } + } +} + +/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ +#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3) + static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) { const struct intel_pt_blk_items *items = &ptq->state->items; @@ -1694,6 +1746,26 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + struct { + struct branch_stack br_stack; + struct branch_entry entries[LBRS_MAX]; + } br; + + if (items->mask[INTEL_PT_LBR_0_POS] || + items->mask[INTEL_PT_LBR_1_POS] || + items->mask[INTEL_PT_LBR_2_POS]) { + intel_pt_add_lbrs(&br.br_stack, items); + sample.branch_stack = &br.br_stack; + } else if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } else { + br.br_stack.nr = 0; + sample.branch_stack = &br.br_stack; + } + } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); }
[tip:perf/core] perf intel-pt: Add gp registers to synthesized PEBS sample
Commit-ID: 9e9a618afc178e747cc449464ba54d9c932f7af2 Gitweb: https://git.kernel.org/tip/9e9a618afc178e747cc449464ba54d9c932f7af2 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:59 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Add gp registers to synthesized PEBS sample Add general purpose register information from PEBS data in the Intel PT trace to the synthesized PEBS sample. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-8-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 69 ++ 1 file changed, 69 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 979519b00a74..00c2c96bb805 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -35,6 +35,8 @@ #include "config.h" #include "time-utils.h" +#include "../arch/x86/include/uapi/asm/perf_regs.h" + #include "intel-pt-decoder/intel-pt-log.h" #include "intel-pt-decoder/intel-pt-decoder.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" @@ -1547,6 +1549,60 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) pt->pwr_events_sample_type); } +/* + * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer + * intel_pt_add_gp_regs(). + */ +static const int pebs_gp_regs[] = { + [PERF_REG_X86_FLAGS]= 1, + [PERF_REG_X86_IP] = 2, + [PERF_REG_X86_AX] = 3, + [PERF_REG_X86_CX] = 4, + [PERF_REG_X86_DX] = 5, + [PERF_REG_X86_BX] = 6, + [PERF_REG_X86_SP] = 7, + [PERF_REG_X86_BP] = 8, + [PERF_REG_X86_SI] = 9, + [PERF_REG_X86_DI] = 10, + [PERF_REG_X86_R8] = 11, + [PERF_REG_X86_R9] = 12, + [PERF_REG_X86_R10] = 13, + [PERF_REG_X86_R11] = 14, + [PERF_REG_X86_R12] = 15, + [PERF_REG_X86_R13] = 16, + [PERF_REG_X86_R14] = 17, + [PERF_REG_X86_R15] = 18, +}; + +static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, +const struct intel_pt_blk_items *items, +u64 regs_mask) +{ + const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; + u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; + u32 bit; + int i; + + for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { + /* Get the PEBS gp_regs array index */ + int n = pebs_gp_regs[i] - 1; + + if (n < 0) + continue; + /* +* Add only registers that were requested (i.e. 'regs_mask') and +* that were provided (i.e. 'mask'), and update the resulting +* mask (i.e. 'intr_regs->mask') accordingly. +*/ + if (mask & 1 << n && regs_mask & bit) { + intr_regs->mask |= bit; + *pos++ = gp_regs[n]; + } + } + + return pos; +} + static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) { const struct intel_pt_blk_items *items = &ptq->state->items; @@ -1597,6 +1653,19 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.time = tsc_to_perf_time(timestamp, &pt->tc); } + if (sample_type & PERF_SAMPLE_REGS_INTR && + items->mask[INTEL_PT_GP_REGS_POS]) { + u64 regs[sizeof(sample.intr_regs.mask)]; + u64 regs_mask = evsel->attr.sample_regs_intr; + + sample.intr_regs.abi = items->is_32_bit ? + PERF_SAMPLE_REGS_ABI_32 : + PERF_SAMPLE_REGS_ABI_64; + sample.intr_regs.regs = regs; + + intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); }
[tip:perf/core] perf intel-pt: Synthesize PEBS sample basic information
Commit-ID: 9d0bc53e35b82e429ab698d112f7af4336578735 Gitweb: https://git.kernel.org/tip/9d0bc53e35b82e429ab698d112f7af4336578735 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:58 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Synthesize PEBS sample basic information Synthesize a PEBS sample using basic information (ip, timestamp) only. Other PEBS information will be added in later patches. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-7-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 52 -- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a2d90b2f1f11..979519b00a74 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1547,9 +1547,57 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) pt->pwr_events_sample_type); } -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq __maybe_unused) +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) { - return 0; + const struct intel_pt_blk_items *items = &ptq->state->items; + struct perf_sample sample = { .ip = 0, }; + union perf_event *event = ptq->event_buf; + struct intel_pt *pt = ptq->pt; + struct perf_evsel *evsel = pt->pebs_evsel; + u64 sample_type = evsel->attr.sample_type; + u64 id = evsel->id[0]; + u8 cpumode; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_a_sample(ptq, event, &sample); + + sample.id = id; + sample.stream_id = id; + + if (!evsel->attr.freq) + sample.period = evsel->attr.sample_period; + + /* No support for non-zero CS base */ + if (items->has_ip) + sample.ip = items->ip; + else if (items->has_rip) + sample.ip = items->rip; + else + sample.ip = ptq->state->from_ip; + + /* No support for guest mode at this time */ + cpumode = sample.ip < ptq->pt->kernel_start ? + PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; + + event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; + + sample.cpumode = cpumode; + + if (sample_type & PERF_SAMPLE_TIME) { + u64 timestamp = 0; + + if (items->has_timestamp) + timestamp = items->timestamp; + else if (!pt->timeless_decoding) + timestamp = ptq->timestamp; + if (timestamp) + sample.time = tsc_to_perf_time(timestamp, &pt->tc); + } + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
[tip:perf/core] perf intel-pt: Prepare to synthesize PEBS samples
Commit-ID: e62ca655eea7ad4956929f647c2d9fb36aeff90e Gitweb: https://git.kernel.org/tip/e62ca655eea7ad4956929f647c2d9fb36aeff90e Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:56 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:17 -0300 perf intel-pt: Prepare to synthesize PEBS samples Add infrastructure to prepare for synthesizing PEBS samples but leave the actual synthesis to later patches. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-5-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 893cef494a43..cc91c1413c22 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -101,6 +101,9 @@ struct intel_pt { u64 pwrx_id; u64 cbr_id; + bool sample_pebs; + struct perf_evsel *pebs_evsel; + u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -1535,6 +1538,11 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) pt->pwr_events_sample_type); } +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq __maybe_unused) +{ + return 0; +} + static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp) { @@ -1622,6 +1630,16 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; } + /* +* Do PEBS first to allow for the possibility that the PEBS timestamp +* precedes the current timestamp. +*/ + if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { + err = intel_pt_synth_pebs_sample(ptq); + if (err) + return err; + } + if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { if (state->type & INTEL_PT_CBR_CHG) { err = intel_pt_synth_cbr_sample(ptq);
[tip:perf/core] perf intel-pt: Factor out common sample preparation for re-use
Commit-ID: 0dfded34a2e3b517c149ee9c7d1e5173025017b7 Gitweb: https://git.kernel.org/tip/0dfded34a2e3b517c149ee9c7d1e5173025017b7 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:57 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:18 -0300 perf intel-pt: Factor out common sample preparation for re-use Factor out common sample preparation for re-use when synthesizing PEBS samples. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-6-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 23 --- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index cc91c1413c22..a2d90b2f1f11 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1182,28 +1182,37 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt) pt->num_events++ < pt->synth_opts.initial_skip; } +static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.size = sizeof(struct perf_event_header); + + sample->pid = ptq->pid; + sample->tid = ptq->tid; + sample->cpu = ptq->cpu; + sample->insn_len = ptq->insn_len; + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); +} + static void intel_pt_prep_b_sample(struct intel_pt *pt, struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample) { + intel_pt_prep_a_sample(ptq, event, sample); + if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample->ip = ptq->state->from_ip; sample->cpumode = intel_pt_cpumode(pt, sample->ip); - sample->pid = ptq->pid; - sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; sample->period = 1; - sample->cpu = ptq->cpu; sample->flags = ptq->flags; - sample->insn_len = ptq->insn_len; - memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); - event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = sample->cpumode; - event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event,
[tip:perf/core] perf intel-pt: Add decoder support for PEBS via PT
Commit-ID: 4c35595e1ea7585d09eb80096f47af237061e795 Gitweb: https://git.kernel.org/tip/4c35595e1ea7585d09eb80096f47af237061e795 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:55 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:17 -0300 perf intel-pt: Add decoder support for PEBS via PT PEBS data is encoded in Block Item Packets (BIP). Populate a new structure intel_pt_blk_items with the values and, upon a Block End Packet (BEP), report them as a new Intel PT sample type INTEL_PT_BLK_ITEMS. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 78 +++- .../perf/util/intel-pt-decoder/intel-pt-decoder.h | 137 + 2 files changed, 214 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 2f7791d4034f..f8b71bf2bb4c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -134,6 +134,9 @@ struct intel_pt_decoder { struct intel_pt_stack stack; enum intel_pt_pkt_state pkt_state; enum intel_pt_pkt_ctx pkt_ctx; + enum intel_pt_pkt_ctx prev_pkt_ctx; + enum intel_pt_blk_type blk_type; + int blk_type_pos; struct intel_pt_pkt packet; struct intel_pt_pkt tnt; int pkt_step; @@ -167,6 +170,7 @@ struct intel_pt_decoder { bool set_fup_mwait; bool set_fup_pwre; bool set_fup_exstop; + bool set_fup_bep; bool sample_cyc; unsigned int fup_tx_flags; unsigned int tx_flags; @@ -560,6 +564,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) memcpy(buf + len, decoder->buf, n); len += n; + decoder->prev_pkt_ctx = decoder->pkt_ctx; ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx); if (ret < (int)old_len) { decoder->next_buf = decoder->buf; @@ -885,6 +890,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) return ret; } + decoder->prev_pkt_ctx = decoder->pkt_ctx; ret = intel_pt_get_packet(decoder->buf, decoder->len, &decoder->packet, &decoder->pkt_ctx); if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && @@ -1124,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) decoder->state.to_ip = 0; ret = true; } + if (decoder->set_fup_bep) { + decoder->set_fup_bep = false; + decoder->state.type |= INTEL_PT_BLK_ITEMS; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + ret = true; + } return ret; } @@ -1609,6 +1623,46 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) intel_pt_log_to("Setting timestamp", decoder->timestamp); } +static void intel_pt_bbp(struct intel_pt_decoder *decoder) +{ + if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) { + memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask)); + decoder->state.items.is_32_bit = false; + } + decoder->blk_type = decoder->packet.payload; + decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type); + if (decoder->blk_type == INTEL_PT_GP_REGS) + decoder->state.items.is_32_bit = decoder->packet.count; + if (decoder->blk_type_pos < 0) { + intel_pt_log("WARNING: Unknown block type %u\n", +decoder->blk_type); + } else if (decoder->state.items.mask[decoder->blk_type_pos]) { + intel_pt_log("WARNING: Duplicate block type %u\n", +decoder->blk_type); + } +} + +static void intel_pt_bip(struct intel_pt_decoder *decoder) +{ + uint32_t id = decoder->packet.count; + uint32_t bit = 1 << id; + int pos = decoder->blk_type_pos; + + if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) { + intel_pt_log("WARNING: Unknown block item %u type %d\n", +id, decoder->blk_type); + return; + } + + if (decoder->state.items.mask[pos] & bit) { + intel_pt_log("WARNING: Duplicate block item %u type %d\n", +id, decoder->blk_type); + } + + decoder->state.items.mask[pos] |= bit; + decoder->state.items.val[pos][id] = decoder->packet.payload; +} + /* Walk PSB+ packets when already in sync. */ static int intel_pt_walk_psbend(stru
[tip:perf/core] perf intel-pt: Add Intel PT packet decoder test
Commit-ID: a0db77bf880b8badd2f9ce4da708c69b0b865853 Gitweb: https://git.kernel.org/tip/a0db77bf880b8badd2f9ce4da708c69b0b865853 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:54 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:17 -0300 perf intel-pt: Add Intel PT packet decoder test Add Intel PT packet decoder test. This test feeds byte sequences to the Intel PT packet decoder and checks the results. Changes to the packet context are also checked. Committer testing: # perf test "Intel PT" 65: Intel PT packet decoder : Ok # perf test -v "Intel PT" 65: Intel PT packet decoder : --- start --- test child forked, pid 6360 Decoded ok: 00PAD Decoded ok: 04TNT N (1) Decoded ok: 06TNT T (1) Decoded ok: 80TNT NN (6) Decoded ok: feTNT TT (6) Decoded ok: 02 a3 02 00 00 00 00 00 TNT N (1) Decoded ok: 02 a3 03 00 00 00 00 00 TNT T (1) Decoded ok: 02 a3 00 00 00 00 00 80 TNT NNN (47) Decoded ok: 02 a3 ff ff ff ff ff ff TNT TTT (47) Decoded ok: 0dTIP no ip Decoded ok: 2d 01 02 TIP 0x201 Decoded ok: 4d 01 02 03 04TIP 0x4030201 Decoded ok: 6d 01 02 03 04 05 06 TIP 0x60504030201 Decoded ok: 8d 01 02 03 04 05 06 TIP 0x60504030201 Decoded ok: cd 01 02 03 04 05 06 07 08TIP 0x807060504030201 Decoded ok: 11TIP.PGE no ip Decoded ok: 31 01 02 TIP.PGE 0x201 Decoded ok: 51 01 02 03 04TIP.PGE 0x4030201 Decoded ok: 71 01 02 03 04 05 06 TIP.PGE 0x60504030201 Decoded ok: 91 01 02 03 04 05 06 TIP.PGE 0x60504030201 Decoded ok: d1 01 02 03 04 05 06 07 08TIP.PGE 0x807060504030201 Decoded ok: 01TIP.PGD no ip Decoded ok: 21 01 02 TIP.PGD 0x201 Decoded ok: 41 01 02 03 04TIP.PGD 0x4030201 Decoded ok: 61 01 02 03 04 05 06 TIP.PGD 0x60504030201 Decoded ok: 81 01 02 03 04 05 06 TIP.PGD 0x60504030201 Decoded ok: c1 01 02 03 04 05 06 07 08TIP.PGD 0x807060504030201 Decoded ok: 1dFUP no ip Decoded ok: 3d 01 02 FUP 0x201 Decoded ok: 5d 01 02 03 04FUP 0x4030201 Decoded ok: 7d 01 02 03 04 05 06 FUP 0x60504030201 Decoded ok: 9d 01 02 03 04 05 06 FUP 0x60504030201 Decoded ok: dd 01 02 03 04 05 06 07 08FUP 0x807060504030201 Decoded ok: 02 43 02 04 06 08 0a 0c PIP 0x60504030201 (NR=0) Decoded ok: 02 43 03 04 06 08 0a 0c PIP 0x60504030201 (NR=1) Decoded ok: 99 00 MODE.Exec 16 Decoded ok: 99 01 MODE.Exec 64 Decoded ok: 99 02 MODE.Exec 32 Decoded ok: 99 20 MODE.TSX TXAbort:0 InTX:0 Decoded ok: 99 21 MODE.TSX TXAbort:0 InTX:1 Decoded ok: 99 22 MODE.TSX TXAbort:1 InTX:0 Decoded ok: 02 83 TraceSTOP Decoded ok: 02 03 12 00 CBR 0x12 Decoded ok: 19 01 02 03 04 05 06 07 TSC 0x7060504030201 Decoded ok: 59 12 MTC 0x12 Decoded ok: 02 73 00 00 00 00 00 TMA CTC 0x0 FC 0x0 Decoded ok: 02 73 01 02 00 00 00 TMA CTC 0x201 FC 0x0 Decoded ok: 02 73 00 00 00 ff 01 TMA CTC 0x0 FC 0x1ff Decoded ok: 02 73 80 c0 00 ff 01 TMA CTC 0xc080 FC 0x1ff Decoded ok: 03CYC 0x0 Decoded ok: 0bCYC 0x1 Decoded ok: fb
[tip:perf/core] perf intel-pt: Add new packets for PEBS via PT
Commit-ID: edff7809c80f09398783d602c33a507309c23e24 Gitweb: https://git.kernel.org/tip/edff7809c80f09398783d602c33a507309c23e24 Author: Adrian Hunter AuthorDate: Mon, 10 Jun 2019 10:27:53 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Jun 2019 15:57:17 -0300 perf intel-pt: Add new packets for PEBS via PT Add 3 new packets to supports PEBS via PT, namely Block Begin Packet (BBP), Block Item Packet (BIP) and Block End Packet (BEP). PEBS data is encoded into multiple BIP packets that come between BBP and BEP. The BEP packet might be associated with a FUP packet. That is indicated by using a separate packet type (INTEL_PT_BEP_IP) similar to other packets types with the _IP suffix. Refer to the Intel SDM for more information about PEBS via PT: https://software.intel.com/en-us/articles/intel-sdm May 2019 version: Vol. 3B 18.5.5.2 PEBS output to Intel® Processor Trace Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP packets only occur in the context of a block (i.e. between BBP and BEP), that context must be recorded and passed to the packet decoder. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190610072803.10456-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 38 +- .../util/intel-pt-decoder/intel-pt-pkt-decoder.c | 140 - .../util/intel-pt-decoder/intel-pt-pkt-decoder.h | 21 +++- tools/perf/util/intel-pt.c | 3 +- 4 files changed, 193 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f001f4ec4ddf..2f7791d4034f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -133,6 +133,7 @@ struct intel_pt_decoder { int mtc_shift; struct intel_pt_stack stack; enum intel_pt_pkt_state pkt_state; + enum intel_pt_pkt_ctx pkt_ctx; struct intel_pt_pkt packet; struct intel_pt_pkt tnt; int pkt_step; @@ -559,7 +560,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) memcpy(buf + len, decoder->buf, n); len += n; - ret = intel_pt_get_packet(buf, len, &decoder->packet); + ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx); if (ret < (int)old_len) { decoder->next_buf = decoder->buf; decoder->next_len = decoder->len; @@ -594,6 +595,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, { struct intel_pt_pkt_info pkt_info; const unsigned char *buf = decoder->buf; + enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx; size_t len = decoder->len; int ret; @@ -612,7 +614,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, if (!len) return INTEL_PT_NEED_MORE_BYTES; - ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + ret = intel_pt_get_packet(buf, len, &pkt_info.packet, + &pkt_ctx); if (!ret) return INTEL_PT_NEED_MORE_BYTES; if (ret < 0) @@ -687,6 +690,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_MNT: case INTEL_PT_PTWRITE: case INTEL_PT_PTWRITE_IP: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: return 0; case INTEL_PT_MTC: @@ -879,7 +886,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) } ret = intel_pt_get_packet(decoder->buf, decoder->len, - &decoder->packet); + &decoder->packet, &decoder->pkt_ctx); if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { ret = intel_pt_get_split_packet(decoder); @@ -1633,6 +1640,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); err = -EAGAIN; @@ -1726,6 +1737,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_MWAIT: case INTEL_PT_PWRE
[tip:perf/core] perf time-utils: Add support for multiple explicit time intervals
Commit-ID: a77a05e2337df1347f4de96bfa313db7008fe8bd Gitweb: https://git.kernel.org/tip/a77a05e2337df1347f4de96bfa313db7008fe8bd Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:17 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:13 -0300 perf time-utils: Add support for multiple explicit time intervals Currently only a single explicit time range is accepted. Add support for multiple ranges separated by spaces, which requires the string to be quoted. Update the time utils test accordingly. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-20-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 8 ++-- tools/perf/Documentation/perf-report.txt | 3 +- tools/perf/Documentation/perf-script.txt | 3 +- tools/perf/tests/time-utils-test.c | 17 tools/perf/util/time-utils.c | 74 +--- 5 files changed, 94 insertions(+), 11 deletions(-) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 5732f69580ab..facd91e4e945 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -145,9 +145,11 @@ OPTIONS ,. Times have the format seconds.nanoseconds. If 'start' is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If stop time is not given (i.e. time - string is 'x.y,') then analysis goes to the end of the file. Time string is - 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different - perf.data files. + string is 'x.y,') then analysis goes to the end of the file. + Multiple ranges can be separated by spaces, which requires the argument + to be quoted e.g. --time "1234.567,1234.789 1235," + Time string is'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps + for different perf.data files. For example, we get the timestamp information from 'perf script'. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 3de029f6881d..8c4372819e11 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -415,7 +415,8 @@ OPTIONS have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If stop time is not given (i.e. time string is 'x.y,') then analysis goes - to end of file. + to end of file. Multiple ranges can be separated by spaces, which + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 878349cce968..d4e2e18a5881 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -364,7 +364,8 @@ include::itrace.txt[] have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If stop time is not given (i.e. time string is 'x.y,') then analysis goes - to end of file. + to end of file. Multiple ranges can be separated by spaces, which + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c index 7504046b111c..4f53006233a1 100644 --- a/tools/perf/tests/time-utils-test.c +++ b/tools/perf/tests/time-utils-test.c @@ -168,6 +168,23 @@ int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) pass &= test__perf_time__parse_for_ranges(&d); } + { + u64 b = 1234567123456789ULL; + u64 c = 7654321987654321ULL; + u64 e = 8000ULL; + struct test_data d = { + .str = "1234567.123456789,1234567.123456790 " +"7654321.987654321,7654321.98765 " +"800,800.5", + .ptime = { {b, b + 1}, {c, c + 123}, {e, e + 5}, }, + .num = 3, + .skip = { b - 1, b + 2, c - 1, c + 124, e - 1, e + 6 }, + .noskip = { b, b + 1, c, c + 123, e, e + 5 }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + { u64 b = 7654321ULL * NSEC_PER_SEC; struct test_data d = { diff --git a/tools/perf/util
[tip:perf/core] perf tests: Add a test for time-utils
Commit-ID: e39a12cbd2496edb4cab0f99efb0d217c55ba273 Gitweb: https://git.kernel.org/tip/e39a12cbd2496edb4cab0f99efb0d217c55ba273 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:16 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf tests: Add a test for time-utils Test time ranges work as expected. Committer testing: $ perf test "time utils" 59: time utils: Ok $ perf test -v "time utils" 59: time utils: --- start --- test child forked, pid 31711 parse_nsec_time("0") 0 parse_nsec_time("1") 10 parse_nsec_time("0.1") 1 parse_nsec_time("1.1") 11 parse_nsec_time("123456.123456") 123456123456000 parse_nsec_time("1234567.123456789") 1234567123456789 parse_nsec_time("18446744073.709551615") 18446744073709551615 perf_time__parse_str("1234567.123456789,1234567.123456789") start time 1234567123456789, end time 1234567123456789 perf_time__parse_str("1234567.123456789,1234567.123456790") start time 1234567123456789, end time 1234567123456790 perf_time__parse_str("1234567.123456789,") start time 1234567123456789, end time 0 perf_time__parse_str(",1234567.123456789") start time 0, end time 1234567123456789 perf_time__parse_str("0,1234567.123456789") start time 0, end time 1234567123456789 perf_time__parse_for_ranges("1234567.123456789,1234567.123456790") start time 1234567123456789, end time 1234567123456790 perf_time__parse_for_ranges("10%/1") first_sample_time 76543210 last_sample_time 765432100100 start time 0: 76543210, end time 0: 76543219 perf_time__parse_for_ranges("10%/2") first_sample_time 76543210 last_sample_time 765432100100 start time 0: 765432100010, end time 0: 765432100019 perf_time__parse_for_ranges("10%/1,10%/2") first_sample_time 112233440 last_sample_time 1122334400100 start time 0: 112233440, end time 0: 112233449 start time 1: 1122334400010, end time 1: 1122334400019 perf_time__parse_for_ranges("10%/1,10%/3,10%/10") first_sample_time 112233440 last_sample_time 1122334400100 start time 0: 112233440, end time 0: 112233449 start time 1: 1122334400020, end time 1: 1122334400029 start time 2: 1122334400090, end time 2: 1122334400100 test child finished with 0 end time utils: Ok $ Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-19-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c| 4 + tools/perf/tests/tests.h | 1 + tools/perf/tests/time-utils-test.c | 234 + 4 files changed, 240 insertions(+) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 4afb6319ed51..e3ba63cef01e 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -51,6 +51,7 @@ perf-y += clang.o perf-y += unit_number__scnprintf.o perf-y += mem2node.o perf-y += map_groups.o +perf-y += time-utils-test.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 941c5456d625..cd72ff0f7658 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -289,6 +289,10 @@ static struct test generic_tests[] = { .desc = "mem2node", .func = test__mem2node, }, + { + .desc = "time utils", + .func = test__time_utils, + }, { .desc = "map_groups__merge_in", .func = test__map_groups__merge_in, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e5e3a57cd373..72912eb473cb 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -108,6 +108,7 @@ int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(struct test *test, int subtest); int test__mem2node(struct test *t, int subtest); int test__map_groups__merge_in(struct test *t, int subtest); +int test__time_utils(struct test *t, int subtest); bool test__bp_signal_is_supported(void); bool test__wp_is_supported(void); diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c new file mode 100644 index ..7504046b111c --- /dev/null +++ b/tools/perf/tests/time-utils-test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "time-utils.h" +#include "evlist.h" +#include "session.h" +#include "debug.h" +#include "tests.h" + +static bool test__parse_nsec_time(const char *str, u64 expected) +{ +
[tip:perf/core] perf time-utils: Simplify perf_time__parse_for_ranges() error paths slightly
Commit-ID: 2a8afddc084a5f5f933382758dd2767ed8a69f77 Gitweb: https://git.kernel.org/tip/2a8afddc084a5f5f933382758dd2767ed8a69f77 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:14 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf time-utils: Simplify perf_time__parse_for_ranges() error paths slightly Simplify perf_time__parse_for_ranges() error paths slightly. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-17-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 3e87c21c293c..9a463752dba8 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -403,7 +403,7 @@ int perf_time__parse_for_ranges(const char *time_str, int *range_size, int *range_num) { struct perf_time_interval *ptime_range; - int size, num, ret; + int size, num, ret = -EINVAL; ptime_range = perf_time__range_alloc(time_str, &size); if (!ptime_range) @@ -415,7 +415,6 @@ int perf_time__parse_for_ranges(const char *time_str, pr_err("HINT: no first/last sample time found in perf data.\n" "Please use latest perf binary to execute 'perf record'\n" "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); - ret = -EINVAL; goto error; } @@ -425,11 +424,8 @@ int perf_time__parse_for_ranges(const char *time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); - if (num < 0) { - pr_err("Invalid time string\n"); - ret = -EINVAL; - goto error; - } + if (num < 0) + goto error_invalid; } else { num = 1; } @@ -439,6 +435,8 @@ int perf_time__parse_for_ranges(const char *time_str, *ranges = ptime_range; return 0; +error_invalid: + pr_err("Invalid time string\n"); error: free(ptime_range); return ret;
[tip:perf/core] perf time-utils: Make perf_time__parse_for_ranges() more logical
Commit-ID: 929afa0092d0ea6be2fbd0ac087319092595eba6 Gitweb: https://git.kernel.org/tip/929afa0092d0ea6be2fbd0ac087319092595eba6 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:15 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf time-utils: Make perf_time__parse_for_ranges() more logical Explicit time ranges never contain a percent sign whereas percentage ranges always do, so it is possible to call the correct parser. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-18-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 9a463752dba8..d942840356e3 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -402,6 +402,7 @@ int perf_time__parse_for_ranges(const char *time_str, struct perf_time_interval **ranges, int *range_size, int *range_num) { + bool has_percent = strchr(time_str, '%'); struct perf_time_interval *ptime_range; int size, num, ret = -EINVAL; @@ -409,7 +410,7 @@ int perf_time__parse_for_ranges(const char *time_str, if (!ptime_range) return -ENOMEM; - if (perf_time__parse_str(ptime_range, time_str) != 0) { + if (has_percent) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { pr_err("HINT: no first/last sample time found in perf data.\n" @@ -427,6 +428,8 @@ int perf_time__parse_for_ranges(const char *time_str, if (num < 0) goto error_invalid; } else { + if (perf_time__parse_str(ptime_range, time_str)) + goto error_invalid; num = 1; }
[tip:perf/core] perf time-utils: Fix --time documentation
Commit-ID: 0ccc69ba0af46e3371c8cefb506aaf9f0e4f554c Gitweb: https://git.kernel.org/tip/0ccc69ba0af46e3371c8cefb506aaf9f0e4f554c Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:13 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf time-utils: Fix --time documentation Correct some punctuation and spelling and correct the format to show that the time resolution is nanoseconds not microseconds. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-16-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 6 +++--- tools/perf/Documentation/perf-report.txt | 6 +++--- tools/perf/Documentation/perf-script.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index da7809b15cc9..5732f69580ab 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -142,9 +142,9 @@ OPTIONS perf diff --time 0%-10%,30%-40% It also supports analyzing samples within a given time window - ,. Times have the format seconds.microseconds. If 'start' - is not given (i.e., time string is ',x.y') then analysis starts at - the beginning of the file. If stop time is not given (i.e, time + ,. Times have the format seconds.nanoseconds. If 'start' + is not given (i.e. time string is ',x.y') then analysis starts at + the beginning of the file. If stop time is not given (i.e. time string is 'x.y,') then analysis goes to the end of the file. Time string is 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different perf.data files. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f441baa794ce..3de029f6881d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -412,12 +412,12 @@ OPTIONS --time:: Only analyze samples within given time window: ,. Times - have the format seconds.microseconds. If start is not given (i.e., time + have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If - stop time is not given (i.e, time string is 'x.y,') then analysis goes + stop time is not given (i.e. time string is 'x.y,') then analysis goes to end of file. - Also support time percent with multiple time range. Time string is + Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c59fd52e9e91..878349cce968 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -361,12 +361,12 @@ include::itrace.txt[] --time:: Only analyze samples within given time window: ,. Times - have the format seconds.microseconds. If start is not given (i.e., time + have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If - stop time is not given (i.e, time string is 'x.y,') then analysis goes + stop time is not given (i.e. time string is 'x.y,') then analysis goes to end of file. - Also support time percent with multipe time range. Time string is + Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example:
[tip:perf/core] perf time-utils: Prevent percentage time range overlap
Commit-ID: b16bfeb3db1b50273e95f539953c337be759500d Gitweb: https://git.kernel.org/tip/b16bfeb3db1b50273e95f539953c337be759500d Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:12 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf time-utils: Prevent percentage time range overlap Prevent percentage time range overlap. This is only a 1 nanosecond change but makes the results more logical e.g. a sample cannot be in both the first 10% and the second 20%. Note, there is a later patch that adds a test for time-utils. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-15-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 69441faab3d0..3e87c21c293c 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -148,6 +148,9 @@ static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt, ptime->start = start + round(start_pcnt * total); ptime->end = start + round(end_pcnt * total); + if (ptime->end > ptime->start && ptime->end != end) + ptime->end -= 1; + return 0; }
[tip:perf/core] perf time-utils: Factor out set_percent_time()
Commit-ID: c763242a5e742f8fefda0bb6cfdf6a5a34ae5e10 Gitweb: https://git.kernel.org/tip/c763242a5e742f8fefda0bb6cfdf6a5a34ae5e10 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:11 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf time-utils: Factor out set_percent_time() Factor out set_percent_time() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-14-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 39 ++- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 1d67cf1216c7..69441faab3d0 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -135,12 +135,27 @@ static int parse_percent(double *pcnt, char *str) return 0; } +static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt, + double end_pcnt, u64 start, u64 end) +{ + u64 total = end - start; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + static int percent_slash_split(char *str, struct perf_time_interval *ptime, u64 start, u64 end) { char *p, *end_str; double pcnt, start_pcnt, end_pcnt; - u64 total = end - start; int i; /* @@ -168,15 +183,7 @@ static int percent_slash_split(char *str, struct perf_time_interval *ptime, start_pcnt = pcnt * (i - 1); end_pcnt = pcnt * i; - if (start_pcnt < 0.0 || start_pcnt > 1.0 || - end_pcnt < 0.0 || end_pcnt > 1.0) { - return -1; - } - - ptime->start = start + round(start_pcnt * total); - ptime->end = start + round(end_pcnt * total); - - return 0; + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); } static int percent_dash_split(char *str, struct perf_time_interval *ptime, @@ -184,7 +191,6 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime, { char *start_str = NULL, *end_str; double start_pcnt, end_pcnt; - u64 total = end - start; int ret; /* @@ -203,16 +209,7 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime, free(start_str); - if (start_pcnt < 0.0 || start_pcnt > 1.0 || - end_pcnt < 0.0 || end_pcnt > 1.0 || - start_pcnt > end_pcnt) { - return -1; - } - - ptime->start = start + round(start_pcnt * total); - ptime->end = start + round(end_pcnt * total); - - return 0; + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); } typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
[tip:perf/core] perf intel-pt: Add support for efficient time interval filtering
Commit-ID: 2c47db90ed71af9c12d5600dbcef864761d76b3d Gitweb: https://git.kernel.org/tip/2c47db90ed71af9c12d5600dbcef864761d76b3d Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:09 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Add support for efficient time interval filtering Set up time ranges for efficient time interval filtering using the new "fast forward" facility. Because decoding is done in time order, intel_pt_time_filter() needs to look only at the next start or end timestamp - refer intel_pt_next_time(). Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-12-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 208 + 1 file changed, 208 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3e3a01318b76..43ddc78a066e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -42,6 +42,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "time-utils.h" #include "intel-pt-decoder/intel-pt-log.h" #include "intel-pt-decoder/intel-pt-decoder.h" @@ -50,6 +51,11 @@ #define MAX_TIMESTAMP (~0ULL) +struct range { + u64 start; + u64 end; +}; + struct intel_pt { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -118,6 +124,9 @@ struct intel_pt { char *filter; struct addr_filters filts; + + struct range *time_ranges; + unsigned int range_cnt; }; enum switch_state { @@ -154,6 +163,9 @@ struct intel_pt_queue { bool have_sample; u64 time; u64 timestamp; + u64 sel_timestamp; + bool sel_start; + unsigned int sel_idx; u32 flags; u16 insn_len; u64 last_insn_cnt; @@ -1007,6 +1019,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags |= PERF_IP_FLAG_TRACE_END; } +static void intel_pt_setup_time_range(struct intel_pt *pt, + struct intel_pt_queue *ptq) +{ + if (!pt->range_cnt) + return; + + ptq->sel_timestamp = pt->time_ranges[0].start; + ptq->sel_idx = 0; + + if (ptq->sel_timestamp) { + ptq->sel_start = true; + } else { + ptq->sel_timestamp = pt->time_ranges[0].end; + ptq->sel_start = false; + } +} + static int intel_pt_setup_queue(struct intel_pt *pt, struct auxtrace_queue *queue, unsigned int queue_nr) @@ -1031,6 +1060,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt, ptq->step_through_buffers = true; ptq->sync_switch = pt->sync_switch; + + intel_pt_setup_time_range(pt, ptq); } if (!ptq->on_heap && @@ -1045,6 +1076,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt, intel_pt_log("queue %u getting timestamp\n", queue_nr); intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + if (ptq->sel_start && ptq->sel_timestamp) { + ret = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (ret) + return ret; + } + while (1) { state = intel_pt_decode(ptq->decoder); if (state->err) { @@ -1064,6 +1103,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt, queue_nr, ptq->timestamp); ptq->state = state; ptq->have_sample = true; + if (ptq->sel_start && ptq->sel_timestamp && + ptq->timestamp < ptq->sel_timestamp) + ptq->have_sample = false; intel_pt_sample_flags(ptq); ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); if (ret) @@ -1750,10 +1792,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) } } +/* + * To filter against time ranges, it is only necessary to look at the next start + * or end time. + */ +static bool intel_pt_next_time(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + + if (ptq->sel_start) { + /* Next time is an end time */ + ptq->sel_start = false; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; + return true; + } else if (ptq->sel_idx + 1 < pt->range_cnt) { + /* Next time is a start time */ + ptq->sel_start = true; + ptq->sel_idx += 1; + ptq->sel_timestamp = pt->time_ran
[tip:perf/core] perf time-utils: Treat time ranges consistently
Commit-ID: f79a7689d99366aee9f89d785bca6c52ed6b76eb Gitweb: https://git.kernel.org/tip/f79a7689d99366aee9f89d785bca6c52ed6b76eb Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:10 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf time-utils: Treat time ranges consistently Currently, options allow only 1 explicit (non-percentage) time range. In preparation for adding support for multiple explicit time ranges, treat time ranges consistently. Instead of treating some time ranges as inclusive and some as excluding the end time, treat all time ranges as inclusive. This is only a 1 nanosecond change but is necessary to treat multiple explicit time ranges in a consistent manner. Note, there is a later patch that adds a test for time-utils. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-13-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 20663a460df3..1d67cf1216c7 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -389,13 +389,12 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, ptime = &ptime_buf[i]; if (timestamp >= ptime->start && - ((timestamp < ptime->end && i < num - 1) || -(timestamp <= ptime->end && i == num - 1))) { - break; + (timestamp <= ptime->end || !ptime->end)) { + return false; } } - return (i == num) ? true : false; + return true; } int perf_time__parse_for_ranges(const char *time_str,
[tip:perf/core] perf intel-pt: Add support for lookahead
Commit-ID: da9000ae35027fb7305b8cad0b37df71937ad578 Gitweb: https://git.kernel.org/tip/da9000ae35027fb7305b8cad0b37df71937ad578 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:08 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Add support for lookahead Implement the lookahead callback to let the decoder access subsequent buffers. intel_pt_lookahead() manages the buffer lifetime and calls the decoder for each buffer until the decoder returns a non-zero value. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-11-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 59 +- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4a61c73c9711..3e3a01318b76 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -278,7 +278,63 @@ static int intel_pt_get_buffer(struct intel_pt_queue *ptq, return 0; } -/* This function assumes data is processed sequentially only */ +/* Do not drop buffers with references - refer intel_pt_get_trace() */ +static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer) +{ + if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) + return; + + auxtrace_buffer__drop_data(buffer); +} + +/* Must be serialized with respect to intel_pt_get_trace() */ +static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, + void *cb_data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err = 0; + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + while (1) { + struct intel_pt_buffer b = { .len = 0 }; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) + break; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); + if (err) + break; + + if (b.len) { + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + old_buffer = buffer; + } else { + intel_pt_lookahead_drop_buffer(ptq, buffer); + continue; + } + + err = cb(&b, cb_data); + if (err) + break; + } + + if (buffer != old_buffer) + intel_pt_lookahead_drop_buffer(ptq, buffer); + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + + return err; +} + +/* + * This function assumes data is processed sequentially only. + * Must be serialized with respect to intel_pt_lookahead() + */ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) { struct intel_pt_queue *ptq = data; @@ -827,6 +883,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; + params.lookahead = intel_pt_lookahead; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt);
[tip:perf/core] perf intel-pt: Factor out intel_pt_get_buffer()
Commit-ID: e96f7df8807615b96af59f8f8bc6263a7adc27b7 Gitweb: https://git.kernel.org/tip/e96f7df8807615b96af59f8f8bc6263a7adc27b7 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:07 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Factor out intel_pt_get_buffer() Factor out intel_pt_get_buffer() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-10-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 60 -- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3cff8fe2eaa0..4a61c73c9711 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -239,32 +239,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer * return 0; } -/* This function assumes data is processed sequentially only */ -static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +static int intel_pt_get_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer, + struct auxtrace_buffer *old_buffer, + struct intel_pt_buffer *b) { - struct intel_pt_queue *ptq = data; - struct auxtrace_buffer *buffer = ptq->buffer; - struct auxtrace_buffer *old_buffer = ptq->old_buffer; - struct auxtrace_queue *queue; bool might_overlap; - if (ptq->stop) { - b->len = 0; - return 0; - } - - queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; - - buffer = auxtrace_buffer__next(queue, buffer); - if (!buffer) { - if (old_buffer) - auxtrace_buffer__drop_data(old_buffer); - b->len = 0; - return 0; - } - - ptq->buffer = buffer; - if (!buffer->data) { int fd = perf_data__fd(ptq->pt->session->data); @@ -294,6 +275,39 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) b->consecutive = true; } + return 0; +} + +/* This function assumes data is processed sequentially only */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); + if (err) + return err; + if (ptq->step_through_buffers) ptq->stop = true;
[tip:perf/core] perf intel-pt: Add intel_pt_fast_forward()
Commit-ID: a7fa19f5a255cc8970202d5c54092089a01fc042 Gitweb: https://git.kernel.org/tip/a7fa19f5a255cc8970202d5c54092089a01fc042 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:06 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Add intel_pt_fast_forward() Intel PT decoding is done in time order. In order to support efficient time interval filtering, add a facility to "fast forward" towards a particular timestamp. That involves finding the right buffer, stepping to that buffer, and then stepping forward PSBs. Because decoding must begin at a PSB, "fast forward" stops at the last PSB that has a timestamp before the target timestamp. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-9-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 130 + .../perf/util/intel-pt-decoder/intel-pt-decoder.h | 2 + 2 files changed, 132 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index dde6a7a97a7a..c374a856e73f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -126,6 +126,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t buf_timestamp; uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; @@ -519,6 +520,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) intel_pt_log("No more data\n"); return -ENODATA; } + decoder->buf_timestamp = buffer.ref_timestamp; if (!buffer.consecutive || reposition) { intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; @@ -2854,3 +2856,131 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, return buf_b; /* No overlap */ } } + +/** + * struct fast_forward_data - data used by intel_pt_ff_cb(). + * @timestamp: timestamp to fast forward towards + * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than + * the fast forward timestamp. + */ +struct fast_forward_data { + uint64_t timestamp; + uint64_t buf_timestamp; +}; + +/** + * intel_pt_ff_cb - fast forward lookahead callback. + * @buffer: Intel PT trace buffer + * @data: opaque pointer to fast forward data (struct fast_forward_data) + * + * Determine if @buffer trace is past the fast forward timestamp. + * + * Return: 1 (stop lookahead) if @buffer trace is past the fast forward + * timestamp, and 0 otherwise. + */ +static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data) +{ + struct fast_forward_data *d = data; + unsigned char *buf; + uint64_t tsc; + size_t rem; + size_t len; + + buf = (unsigned char *)buffer->buf; + len = buffer->len; + + if (!intel_pt_next_psb(&buf, &len) || + !intel_pt_next_tsc(buf, len, &tsc, &rem)) + return 0; + + tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp); + + intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n", +tsc, buffer->ref_timestamp); + + /* +* If the buffer contains a timestamp earlier that the fast forward +* timestamp, then record it, else stop. +*/ + if (tsc < d->timestamp) + d->buf_timestamp = buffer->ref_timestamp; + else + return 1; + + return 0; +} + +/** + * intel_pt_fast_forward - reposition decoder forwards. + * @decoder: Intel PT decoder + * @timestamp: timestamp to fast forward towards + * + * Reposition decoder at the last PSB with a timestamp earlier than @timestamp. + * + * Return: 0 on success or negative error code on failure. + */ +int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp) +{ + struct fast_forward_data d = { .timestamp = timestamp }; + unsigned char *buf; + size_t len; + int err; + + intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp); + + /* Find buffer timestamp of buffer to fast forward to */ + err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d); + if (err < 0) + return err; + + /* Walk to buffer with same buffer timestamp */ + if (d.buf_timestamp) { + do { + decoder->pos += decoder->len; + decoder->len = 0; + err = intel_pt_get_next_data(decoder, true); + /* -ENOLINK means non-consecutive trace */ + if (err && err != -ENOLINK) +
[tip:perf/core] perf intel-pt: Add reposition parameter to intel_pt_get_data()
Commit-ID: 6c1f0b18ac3361837dbe53e794e28096285fb4f0 Gitweb: https://git.kernel.org/tip/6c1f0b18ac3361837dbe53e794e28096285fb4f0 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:05 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Add reposition parameter to intel_pt_get_data() When the decoder gets the next trace buffer, some state is reset if the buffer is not consecutive to the previous buffer. Add a parameter 'reposition' so that can be done also to support a "fast forward" facility. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-8-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 70bff7bb79f3..dde6a7a97a7a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -502,7 +502,7 @@ static void intel_pt_reposition(struct intel_pt_decoder *decoder) decoder->have_tma = false; } -static int intel_pt_get_data(struct intel_pt_decoder *decoder) +static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) { struct intel_pt_buffer buffer = { .buf = 0, }; int ret; @@ -519,7 +519,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) intel_pt_log("No more data\n"); return -ENODATA; } - if (!buffer.consecutive) { + if (!buffer.consecutive || reposition) { intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; decoder->state.trace_nr = buffer.trace_nr; @@ -531,10 +531,11 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) return 0; } -static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder, + bool reposition) { if (!decoder->next_buf) - return intel_pt_get_data(decoder); + return intel_pt_get_data(decoder, reposition); decoder->buf = decoder->next_buf; decoder->len = decoder->next_len; @@ -553,7 +554,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) len = decoder->len; memcpy(buf, decoder->buf, len); - ret = intel_pt_get_data(decoder); + ret = intel_pt_get_data(decoder, false); if (ret) { decoder->pos += old_len; return ret < 0 ? ret : -EINVAL; @@ -879,7 +880,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) decoder->len -= decoder->pkt_step; if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } @@ -2369,7 +2370,7 @@ static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, decoder->pos += decoder->len; decoder->len = 0; - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; @@ -2395,7 +2396,7 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) intel_pt_log("Scanning for PSB\n"); while (1) { if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; }
[tip:perf/core] perf intel-pt: Factor out intel_pt_reposition()
Commit-ID: 6492e5f013d9975d68528150edadead91e97a78a Gitweb: https://git.kernel.org/tip/6492e5f013d9975d68528150edadead91e97a78a Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:04 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Factor out intel_pt_reposition() Factor out intel_pt_reposition() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-7-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index c06dceb774e9..70bff7bb79f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -494,6 +494,14 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; } +static void intel_pt_reposition(struct intel_pt_decoder *decoder) +{ + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->timestamp = 0; + decoder->have_tma = false; +} + static int intel_pt_get_data(struct intel_pt_decoder *decoder) { struct intel_pt_buffer buffer = { .buf = 0, }; @@ -512,11 +520,8 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) return -ENODATA; } if (!buffer.consecutive) { - decoder->ip = 0; - decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; - decoder->timestamp = 0; - decoder->have_tma = false; decoder->state.trace_nr = buffer.trace_nr; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp);
[tip:perf/core] perf intel-pt: Factor out intel_pt_8b_tsc()
Commit-ID: e72b52a2cfdea5cb0279b2d63a36d78b8c2134de Gitweb: https://git.kernel.org/tip/e72b52a2cfdea5cb0279b2d63a36d78b8c2134de Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:03 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Factor out intel_pt_8b_tsc() Factor out intel_pt_8b_tsc() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-6-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 26 ++ 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 13123b195083..c06dceb774e9 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1369,6 +1369,21 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) return 0; } +static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp) +{ + timestamp |= (ref_timestamp & (0xffULL << 56)); + + if (timestamp < ref_timestamp) { + if (ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + + return timestamp; +} + static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; @@ -1376,15 +1391,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) decoder->have_tma = false; if (decoder->ref_timestamp) { - timestamp = decoder->packet.payload | - (decoder->ref_timestamp & (0xffULL << 56)); - if (timestamp < decoder->ref_timestamp) { - if (decoder->ref_timestamp - timestamp > (1ULL << 55)) - timestamp += (1ULL << 56); - } else { - if (timestamp - decoder->ref_timestamp > (1ULL << 55)) - timestamp -= (1ULL << 56); - } + timestamp = intel_pt_8b_tsc(decoder->packet.payload, + decoder->ref_timestamp); decoder->tsc_timestamp = timestamp; decoder->timestamp = timestamp; decoder->ref_timestamp = 0;
[tip:perf/core] perf report: Set perf time interval in itrace_synth_ops
Commit-ID: 4885c90c5e84926cfb083c58d8b6d70d1b1ac7cf Gitweb: https://git.kernel.org/tip/4885c90c5e84926cfb083c58d8b6d70d1b1ac7cf Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:01 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf report: Set perf time interval in itrace_synth_ops Instruction trace decoders can optimize output based on what time intervals will be filtered, so pass that information in itrace_synth_ops. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1ca533f06a4c..91c40808380d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1428,6 +1428,10 @@ repeat: &report.range_num); if (ret < 0) goto error; + + itrace_synth_opts__set_time_range(&itrace_synth_opts, + report.ptime_range, + report.range_num); } if (session->tevent.pevent && @@ -1449,8 +1453,10 @@ repeat: ret = 0; error: - if (report.ptime_range) + if (report.ptime_range) { + itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&report.ptime_range); + } zstd_fini(&(session->zstd_data)); perf_session__delete(session); return ret;
[tip:perf/core] perf intel-pt: Add lookahead callback
Commit-ID: 4d678e9039b075f9418600dc87ec5e61cfb57115 Gitweb: https://git.kernel.org/tip/4d678e9039b075f9418600dc87ec5e61cfb57115 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:02 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:12 -0300 perf intel-pt: Add lookahead callback Add a callback function to enable the decoder to lookahead at subsequent trace buffers. This will be used to implement a "fast forward" facility which will be needed to support efficient time interval filtering. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-5-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++ tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9eb778f9c911..13123b195083 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -104,6 +104,7 @@ struct intel_pt_decoder { uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; struct intel_pt_state state; const unsigned char *buf; @@ -233,6 +234,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->get_trace = params->get_trace; decoder->walk_insn = params->walk_insn; decoder->pgd_ip = params->pgd_ip; + decoder->lookahead = params->lookahead; decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 6a61773dc44b..de36254c6ac9 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -102,12 +102,15 @@ struct intel_pt_buffer { uint64_t trace_nr; }; +typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *); + struct intel_pt_params { int (*get_trace)(struct intel_pt_buffer *buffer, void *data); int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; bool return_compression; bool branch_enable;
[tip:perf/core] perf script: Set perf time interval in itrace_synth_ops
Commit-ID: 400ae9818fe64899cea921a89c7078e0df9e41ea Gitweb: https://git.kernel.org/tip/400ae9818fe64899cea921a89c7078e0df9e41ea Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 16:00:00 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:11 -0300 perf script: Set perf time interval in itrace_synth_ops Instruction trace decoders can optimize output based on what time intervals will be filtered, so pass that information in itrace_synth_ops. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 80c722ade852..61f00055476a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3829,6 +3829,10 @@ int cmd_script(int argc, const char **argv) &script.range_num); if (err < 0) goto out_delete; + + itrace_synth_opts__set_time_range(&itrace_synth_opts, + script.ptime_range, + script.range_num); } err = __cmd_script(&script); @@ -3836,8 +3840,10 @@ int cmd_script(int argc, const char **argv) flush_scripting(); out_delete: - if (script.ptime_range) + if (script.ptime_range) { + itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&script.ptime_range); + } perf_evlist__free_stats(session->evlist); perf_session__delete(session);
[tip:perf/core] perf auxtrace: Add perf time interval to itrace_synth_ops
Commit-ID: 33526f362b019f0a17c6b522eb3b07017dba98a7 Gitweb: https://git.kernel.org/tip/33526f362b019f0a17c6b522eb3b07017dba98a7 Author: Adrian Hunter AuthorDate: Tue, 4 Jun 2019 15:59:59 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 10 Jun 2019 16:20:11 -0300 perf auxtrace: Add perf time interval to itrace_synth_ops Instruction trace decoders can optimize output based on what time intervals will be filtered, so pass that information in itrace_synth_ops. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.h | 34 ++ 1 file changed, 34 insertions(+) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index c69bcd9a3091..c80c58eb7f4d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -83,6 +83,8 @@ enum itrace_period_type { * @period_type: 'instructions' events period type * @initial_skip: skip N events at the beginning. * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all + * @ptime_range: time intervals to trace or NULL + * @range_num: number of time intervals to trace */ struct itrace_synth_opts { boolset; @@ -107,6 +109,8 @@ struct itrace_synth_opts { enum itrace_period_type period_type; unsigned long initial_skip; unsigned long *cpu_bitmap; + struct perf_time_interval *ptime_range; + int range_num; }; /** @@ -599,6 +603,21 @@ static inline void auxtrace__free(struct perf_session *session) " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" +static inline +void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts, + struct perf_time_interval *ptime_range, + int range_num) +{ + opts->ptime_range = ptime_range; + opts->range_num = range_num; +} + +static inline +void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) +{ + opts->ptime_range = NULL; + opts->range_num = 0; +} #else @@ -742,6 +761,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, #define ITRACE_HELP "" +static inline +void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts + __maybe_unused, + struct perf_time_interval *ptime_range + __maybe_unused, + int range_num __maybe_unused) +{ +} + +static inline +void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts +__maybe_unused) +{ +} + #endif #endif
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Select find text when find bar is activated
Commit-ID: 80b3fb64a55a7e4ba1ef8f9a7e87fbe1a26dc709 Gitweb: https://git.kernel.org/tip/80b3fb64a55a7e4ba1ef8f9a7e87fbe1a26dc709 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:28 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:58 -0300 perf scripts python: exported-sql-viewer.py: Select find text when find bar is activated The user probably wants to replace the find text, so select the find text when the find bar is activated. That is fairly standard behaviour for search text entry. Entering text will replace the current text, but using edit keys (arrows, home, end etc) cancels the selection and enables editing. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-23-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 94489cf2ce0e..6e7934f2ac9a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -400,6 +400,7 @@ class FindBar(): def Activate(self): self.bar.show() + self.textbox.lineEdit().selectAll() self.textbox.setFocus() def Deactivate(self):
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Add IPC information to Call Tree
Commit-ID: b3b660792e049c7ef4a40c4caa7008efd4777b3c Gitweb: https://git.kernel.org/tip/b3b660792e049c7ef4a40c4caa7008efd4777b3c Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:27 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf scripts python: exported-sql-viewer.py: Add IPC information to Call Tree Enhance the call tree to display IPC information if it is available. Committer testing: [acme@quaco adrian.hunter]$ python ~acme/libexec/perf-core/scripts/python/exported-sql-viewer.py ~/c/adrian.hunter/simple-retpoline.db Reports -> Call Tree, then expand a few trees, then select with the mouse and press control+C (copy): Call Path ObjectCall Time Time Time(%) Insn Insn Cyc Cyc IPC Branch Branch â–¼ simple-retpolin (ns) Cnt Cnt(%) Cnt Cnt(%) Count Count(%) â–¼ 23003:23003 â–¼ _startld-2.28.so112195670 218295 100.0 127746 100.0 207320 100.0 0.62 13046 100.0 â–¶ unknown unknown 112195987 3202 1.5 0 0.0 0 0.00 1 0.0 â–¶ _dl_start ld-2.28.so112199189 188471 86.3 123394 96.6 180007 86.8 0.69 12529 96.0 â–¼ _dl_initld-2.28.so112387660 13406 6.1 3207 2.5 14868 7.2 0.22 327 2.5 â–¶ call_init.part.0 ld-2.28.so112387773117 0.9 70 2.2 639 4.3 0.11 3 0.9 â–¶ call_init.part.0 ld-2.28.so112387890 13129 97.9 3103 96.8 14100 94.8 0.22 315 96.3 â–¶ call_init.part.0 ld-2.28.so112401020 0 0.0 0 0.0 0 0.00 2 0.6 â–¼ _start simple-retpol 112401066 12899 5.9 1142 0.9 11561 5.6 0.10 184 1.4 â–¶ unknown unknown 112401388846 6.6 0 0.0 0 0.00 1 0.5 â–¼ __libc_start_main libc-2.28.so 112402344 11621 90.1 1129 98.9 10350 89.5 0.11 181 98.4 â–¶ __cxa_atexitlibc-2.28.so 112402360 2302 19.8101 8.9 1817 17.6 0.0613 7.2 â–¶ __libc_csu_init simple-retpol 112404673121 1.0 43 3.8 340 3.3 0.13 8 4.4 â–¶ _setjmp libc-2.28.so 112404794 74 0.6 46 4.1 206 2.0 0.22 4 2.2 â–¼ mainsimple-retpol 112404892 44 0.4 23 2.0 126 1.2 0.1812 6.6 â–¼ foo simple-retpol 112404892 19 43.2 12 52.2 55 43.7 0.22 5 41.7 bar simple-retpol 112404896 12 63.2 3 25.0 34 61.8 0.09 1 20.0 â–¼ foo simple-retpol 112404911 25 56.8 11 47.8 71 56.3 0.15 5 41.7 â–¶ bar simple-retpol 112404924 10 40.0 3 27.3 27 38.0 0.11 1 20.0 â–¶ exitlibc-2.28.so 112404936 9029 77.7878 77.8 7765 75.0 0.11 139 76.8 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-22-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 69 +++- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index f5b1b63995b0..94489cf2ce0e 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -781,11 +781,13 @@ class CallGraphModel(CallGraphModelBase): class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.calls_id = calls_id + self.insn_cnt = insn_cnt + self.cyc_cnt = cyc_cnt self.branch_count = branch_count self.time = time @@ -795,8 +797,12 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) else: comm_thread = "" + if self.params.have_ipc: + ipc_str = ", insn_count, cyc_count" + else: + ipc_str = "" query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time, branch_count" + QueryExec(query, "SELECT call
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Add IPC information to Call Graph Graph
Commit-ID: 38a846d47f3d2fe6783e2df7bc5c2415239e6a63 Gitweb: https://git.kernel.org/tip/38a846d47f3d2fe6783e2df7bc5c2415239e6a63 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:26 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf scripts python: exported-sql-viewer.py: Add IPC information to Call Graph Graph Enhance the call graph to display IPC information if it is available. Committer testing: [acme@quaco adrian.hunter]$ python ~acme/libexec/perf-core/scripts/python/exported-sql-viewer.py ~/c/adrian.hunter/simple-retpoline.db Reports -> Context Sensitive Callgraph, then expand a few trees, then select with the mouse and press control+C: Call Path Object Count Time(ns) Time(%) Insn Insn Cyc CycIPC Branch Branch â–¼ simple-retpolinCnt Cnt(%) Cnt Cnt(%) CntCnt(%) â–¼ 23003:23003 â–¼ _start ld-2.28.so 1 218295 100.0 127746 100.0 207320 100.0 0.62 13046 100.0 â–¶ unknown unknown1 3202 1.5 0 0.0 0 0.00 10.0 â–¶ _dl_start ld-2.28.so 1 18847186.3 123394 96.6 180007 86.8 0.69 12529 96.0 â–¶ _dl_init ld-2.28.so 1 13406 6.13207 2.5 14868 7.2 0.22 3272.5 â–¼ _startsimple-retpoline 1 12899 5.91142 0.9 11561 5.6 0.10 1841.4 â–¶ unknown unknown1846 6.6 0 0.0 0 0.00 10.5 â–¼ __libc_start_main libc-2.28.so 1 1162190.11129 98.9 10350 89.5 0.11 181 98.4 â–¶ __cxa_atexit libc-2.28.so 1 230219.8 101 8.9 1817 17.6 0.06137.2 â–¶ __libc_csu_init simple-retpoline 1121 1.0 43 3.8 340 3.3 0.13 84.4 â–¼ _setjmp libc-2.28.so 1 74 0.6 46 4.1 206 2.0 0.22 42.2 â–¼ __sigsetjmp libc-2.28.so 1 74 100.0 46 100.0 206 100.0 0.22 3 75.0 â–¶ __sigjmp_save libc-2.28.so 1 0 0.0 0 0.0 0 0.00 1 33.3 â–¼ main simple-retpoline 1 44 0.4 23 2.0 126 1.2 0.18126.6 â–¼ foo simple-retpoline 2 44 100.0 23 100.0 126 100.0 0.1810 83.3 bar simple-retpoline 2 2250.0 6 26.1 61 48.4 0.10 2 20.0 â–¶ exit libc-2.28.so 1 902977.7 878 77.8 7765 75.0 0.11 139 76.8 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-21-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 69 +++- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index b3508bd4eb00..f5b1b63995b0 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -505,18 +505,24 @@ class CallGraphLevelItemBase(object): class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.call_path_id = call_path_id + self.insn_cnt = insn_cnt + self.cyc_cnt = cyc_cnt self.branch_count = branch_count self.time = time def Select(self): self.query_done = True; query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + if self.params.have_ipc: + ipc_str = ", SUM(insn_count), SUM(cyc_count)" + else: + ipc_str = "" + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time)" + ipc_str + ", SUM(branch_count)" " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" @@ -527,7 +533,15 @@ class CallGraphLevel
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Add CallGraphModelParams
Commit-ID: 4a0979d4b4feee67a7f9a5605b5bfae3b0a2b6a9 Gitweb: https://git.kernel.org/tip/4a0979d4b4feee67a7f9a5605b5bfae3b0a2b6a9 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:25 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf scripts python: exported-sql-viewer.py: Add CallGraphModelParams Add a parameter to call graph and call tree, to determine whether IPC information is available. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-20-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 73 +--- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index a607235c8cd9..b3508bd4eb00 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -200,9 +200,10 @@ class Thread(QThread): class TreeModel(QAbstractItemModel): - def __init__(self, glb, parent=None): + def __init__(self, glb, params, parent=None): super(TreeModel, self).__init__(parent) self.glb = glb + self.params = params self.root = self.GetRoot() self.last_row_read = 0 @@ -463,8 +464,9 @@ class FindBar(): class CallGraphLevelItemBase(object): - def __init__(self, glb, row, parent_item): + def __init__(self, glb, params, row, parent_item): self.glb = glb + self.params = params self.row = row self.parent_item = parent_item self.query_done = False; @@ -503,8 +505,8 @@ class CallGraphLevelItemBase(object): class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): - super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.call_path_id = call_path_id @@ -525,7 +527,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): " GROUP BY call_path_id, name, short_name" " ORDER BY call_path_id") while query.next(): - child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) self.child_items.append(child_item) self.child_count += 1 @@ -533,8 +535,8 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): - super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) dso = dsoname(dso) self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = call_path_id @@ -543,8 +545,8 @@ class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): - super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, parent_item) self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id @@ -561,8 +563,8 @@ class CallGraphLevelTwoItem(CallGraphLevelTw
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Add IPC information to the Branch reports
Commit-ID: 530e22fd5c6d2c572b1bbdda23eafa01a005fce0 Gitweb: https://git.kernel.org/tip/530e22fd5c6d2c572b1bbdda23eafa01a005fce0 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:24 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf scripts python: exported-sql-viewer.py: Add IPC information to the Branch reports Enhance the "All branches" and "Selected branches" reports to display IPC information if it is available. Committer testing: So, testing this I noticed that it all starts with the left arrow in every line, that should mean there is some tree there, i.e. look at all those â–¶ symbols: Reports -> All Branches: Time CPU Command PID TID Branch Type In Tx Insn Cnt Cyc Cnt IPC Branch â–¶ 187836112195670 7 simple-retpolin 23003 23003 trace begin No 0 00 0 unknown (unknown) -> 7f6f33d4f110 +_start (ld-2.28.so) â–¶ 187836112195987 7 simple-retpolin 23003 23003 trace endNo 0 883 07f6f33d4f110 _start (ld-2.28.so) -> 0 unknown +(unknown) â–¶ 187836112199189 7 simple-retpolin 23003 23003 trace begin No 0 00 0 unknown (unknown) -> 7f6f33d4f110 +_start (ld-2.28.so) â–¶ 187836112199189 7 simple-retpolin 23003 23003 call No 0 007f6f33d4f113 _start+0x3 (ld-2.28.so) -> 7f6f33d4ff50 +_dl_start (ld-2.28.so) â–¶ 187836112199544 7 simple-retpolin 23003 23003 trace endNo 17 996 0.02 7f6f33d4ff73 _dl_start+0x23 (ld-2.28.so) -> 0 +unknown (unknown) â–¶ 187836112200939 7 simple-retpolin 23003 23003 trace begin No 0 00 0 unknown (unknown) -> 7f6f33d4ff73 +_dl_start+0x23 (ld-2.28.so) â–¶ 187836112201229 7 simple-retpolin 23003 23003 trace endNo 1 816 0.00 7f6f33d4ff7a _dl_start+0x2a (ld-2.28.so) -> 0 +unknown (unknown) â–¶ 187836112203500 7 simple-retpolin 23003 23003 trace begin No 0 00 0 unknown (unknown) -> 7f6f33d4ff7a +_dl_start+0x2a (ld-2.28.so) But if you click on it, that â–¶ disappears and a new click doesn't make it reappear, looks buggy, minor oddity, reported to Adrian. Reports -> Selected Branches, then ask for branches in the ld-2.28.so DSO: Time CPU Command PIDTIDBranch TypeIn Tx Insn Cnt Cyc Cnt IPC Branch â–¶ 187836112195987 7simple-retpolin 23003 23003 trace end No 0 883 0 7f6f33d4f110 _start (ld-2.28.so) -> 0 unknown (unknown) â–¶ 187836112199189 7simple-retpolin 23003 23003 trace beginNo 0 000 unknown (unknown) -> 7f6f33d4f110 _start (ld-2.28.so) â–¶ 187836112199189 7simple-retpolin 23003 23003 call No 0 00 7f6f33d4f113 _start+0x3 (ld-2.28.so) -> 7f6f33d4ff50 _dl_start (ld-2.28.so) â–¶ 187836112199544 7simple-retpolin 23003 23003 trace end No 17996 0.02 7f6f33d4ff73 _dl_start+0x23 (ld-2.28.so) -> 0 unknown (unknown) â–¶ 187836112200939 7simple-retpolin 23003 23003 trace beginNo 0 000 unknown (unknown) -> 7f6f33d4ff73 _dl_start+0x23 (ld-2.28.so) â–¶ 187836112201229 7simple-retpolin 23003 23003 trace end No 1 816 0.00 7f6f33d4ff7a _dl_start+0x2a (ld-2.28.so) -> 0 unknown (unknown) â–¶ 187836112203500 7simple-retpolin 23003 23003 trace beginNo 0 000 unknown (unknown) -> 7f6f33d4ff7a _dl_start+0x2a (ld-2.28.so) â–¶ 187836112203528 7simple-retpolin 23003 23003 unconditional jump No 0 00 7f6f33d4ffe7 _dl_start+0x97 (ld-2.28.so) -> 7f6f33d5000b _dl_start+0xbb (ld-2.28.so) â–¶ 187836112203528 7simple-retpolin 23003 23003 conditional jump No 0 00 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab (ld-2.28.so) â–¶ 187836112203528 7simple-retpolin 23003 23003 conditional jump No 0 00 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab (ld-2.28.so) â–¶ 187836112203539 7simple-retpolin 23003 23003 conditional jump No 0 00 7f6f33d50025 _dl_start+0xd5 (ld-2.28.so) -> 7f6f33d50210 _dl_start+0x2c0 (ld-2.28.so) â–¶ 187836112203539 7simple-retpolin 23003 23003 conditional jump No 0 00 7f6f33d5021a _dl_start+0x2ca (ld-2.28.so) -> 7f6f33d50360 _dl_start+0x410 (ld-2.28.so) â–¶ 187836112203539 7simple-retpolin 23003 23003 unconditional jump No 0 00 7f6f33d50377 _dl_start+0x427 (ld-2.28.so) -> 7f6f33d4 _dl_start+0xaf (ld-2.28.so) â–¶ 187836112203539 7simple-retpolin 23003 23003 conditional jump No 0 00 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab
[tip:perf/core] perf scripts python: export-to-postgresql.py: Export IPC information
Commit-ID: ec7f448e2b2e13d1629300c5881cb3b5e0a99c2f Gitweb: https://git.kernel.org/tip/ec7f448e2b2e13d1629300c5881cb3b5e0a99c2f Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:23 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf scripts python: export-to-postgresql.py: Export IPC information Export cycle and instruction counts on samples and calls tables. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-18-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 36 +++ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index b2f481b0d28d..93225c02117e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -394,7 +394,9 @@ if branches: 'to_ip bigint,' 'branch_typeinteger,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id bigint NOT NULL,' @@ -418,7 +420,9 @@ else: 'data_src bigint,' 'branch_typeinteger,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' @@ -439,7 +443,9 @@ if perf_db_export_calls: 'return_id bigint,' 'parent_call_path_idbigint,' 'flags integer,' - 'parent_id bigint)') + 'parent_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' @@ -521,6 +527,9 @@ if perf_db_export_calls: 'return_time,' 'return_time - call_time AS elapsed_time,' 'branch_count,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC,' 'call_id,' 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,' @@ -546,7 +555,10 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_sym_offset,' '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' - 'in_tx' + 'in_tx,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC' ' FROM samples') @@ -618,10 +630,10 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) unhandled_count = 0 @@ -772,11 +784,11 @@ def branch_type_table(branch_type, name, *x): value = struct.pack(fmt, 2, 4, branch_type, n, name) branch_type_file.write(value) -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, insn_cnt, cyc_cnt, *x): if branches: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id
[tip:perf/core] perf scripts python: export-to-sqlite.py: Export IPC information
Commit-ID: 64adadb3f9dbaaae3d14ea75fa71a3b877cbe82e Gitweb: https://git.kernel.org/tip/64adadb3f9dbaaae3d14ea75fa71a3b877cbe82e Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:22 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf scripts python: export-to-sqlite.py: Export IPC information Export cycle and instruction counts on samples and calls tables. Committer testing: First runs some workload collecting intel_pt with the 'cyc' ter just for userspace: [root@quaco adrian.hunter]# perf record -o simple-retpoline.perf.data -e intel_pt/cyc/u ./simple-retpoline [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.035 MB simple-retpoline.perf.data ] [root@quaco adrian.hunter]# Then use the export-to-sqlite.py script to see if the changes in this cset don't make it to break and if the changes in the db schema are the ones expected: [root@quaco adrian.hunter]# perf script -i simple-retpoline.perf.data --itrace=be -s ~acme/libexec/perf-core/scripts/python/export-to-sqlite.py simple-retpoline.db branches calls 2019-05-31 11:50:46.942710 Creating database ... 2019-05-31 11:50:46.949663 Writing records... 2019-05-31 11:50:47.224033 Adding indexes 2019-05-31 11:50:47.231599 Done [root@quaco adrian.hunter]# Now lets use the db: [root@quaco adrian.hunter]# sqlite3 simple-retpoline.db SQLite version 3.26.0 2018-12-01 12:34:55 Enter ".help" for usage hints. sqlite> .schema samples CREATE TABLE samples (id integer NOT NULL PRIMARY KEY,evsel_id bigint,machine_id bigint,thread_id bigint,comm_id bigint,dso_id bigint,symbol_id bigint,sym_offset bigint,ip bigint,time bigint,cpuinteger,to_dso_id bigint,to_symbol_id bigint,to_sym_offset bigint,to_ip bigint,branch_type integer,in_tx boolean,call_path_id bigint,insn_count bigint,cyc_count bigint); sqlite> Cool, the 'insn_count' and 'cyc_count' are there, now lets see if we can use them in a query: sqlite> select insn_count,cyc_count from samples where cyc_count > 1500 and insn_count < 10; 6|1507 sqlite> select insn_count,cyc_count from samples where cyc_count > 1500; 118|2210 140|1516 3783|1861 132|1521 6|1507 sqlite> Seems to work :-) Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-17-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 36 ++- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index f617e518332f..4542ce89034b 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -218,7 +218,9 @@ if branches: 'to_ip bigint,' 'branch_typeinteger,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id integer NOT NULLPRIMARY KEY,' @@ -242,7 +244,9 @@ else: 'data_src bigint,' 'branch_typeinteger,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' @@ -263,7 +267,9 @@ if perf_db_export_calls: 'return_id bigint,' 'parent_call_path_idbigint,' 'flags integer,' - 'parent_id bigint)') + 'parent_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False @@ -359,6 +365,9 @@ if perf_db_export_calls: 'return_time,' 'return_time - call_time AS elapsed_time,' 'branch_count,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC,' 'call_id,' 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' @@ -384,7 +393,10 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_sym_offset,' '(SELECT short_name FROM dsos WHERE id = to_dso
[tip:perf/core] perf db-export: Export IPC information
Commit-ID: 52a2ab6fa99df9288f2c8c7f461b815550b9b366 Gitweb: https://git.kernel.org/tip/52a2ab6fa99df9288f2c8c7f461b815550b9b366 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:21 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf db-export: Export IPC information Export cycle and instruction counts on samples and call-returns. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-16-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 22f52b669871..6acb379b53ec 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -,7 +,7 @@ static int python_export_sample(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(22); + t = tuple_new(24); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -1135,6 +1135,8 @@ static int python_export_sample(struct db_export *dbe, tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); tuple_set_u64(t, 21, es->call_path_id); + tuple_set_u64(t, 22, es->sample->insn_cnt); + tuple_set_u64(t, 23, es->sample->cyc_cnt); call_object(tables->sample_handler, t, "sample_table"); @@ -1173,7 +1175,7 @@ static int python_export_call_return(struct db_export *dbe, u64 comm_db_id = cr->comm ? cr->comm->db_id : 0; PyObject *t; - t = tuple_new(12); + t = tuple_new(14); tuple_set_u64(t, 0, cr->db_id); tuple_set_u64(t, 1, cr->thread->db_id); @@ -1187,6 +1189,8 @@ static int python_export_call_return(struct db_export *dbe, tuple_set_u64(t, 9, cr->cp->parent->db_id); tuple_set_s32(t, 10, cr->flags); tuple_set_u64(t, 11, cr->parent_db_id); + tuple_set_u64(t, 12, cr->insn_count); + tuple_set_u64(t, 13, cr->cyc_count); call_object(tables->call_return_handler, t, "call_return_table");
[tip:perf/core] perf db-export: Add brief documentation
Commit-ID: 1159facee97fe184a434db3086604c7572fd7dfa Gitweb: https://git.kernel.org/tip/1159facee97fe184a434db3086604c7572fd7dfa Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:20 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf db-export: Add brief documentation Add brief documentation to explain how the database export maintains backward and forward compatibility. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-15-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/db-export.txt | 41 ++ 1 file changed, 41 insertions(+) diff --git a/tools/perf/Documentation/db-export.txt b/tools/perf/Documentation/db-export.txt new file mode 100644 index ..52ffccb02d55 --- /dev/null +++ b/tools/perf/Documentation/db-export.txt @@ -0,0 +1,41 @@ +Database Export +=== + +perf tool's python scripting engine: + + tools/perf/util/scripting-engines/trace-event-python.c + +supports scripts: + + tools/perf/scripts/python/export-to-sqlite.py + tools/perf/scripts/python/export-to-postgresql.py + +which export data to a SQLite3 or PostgreSQL database. + +The export process provides records with unique sequential ids which allows the +data to be imported directly to a database and provides the relationships +between tables. + +Over time it is possible to continue to expand the export while maintaining +backward and forward compatibility, by following some simple rules: + +1. Because of the nature of SQL, existing tables and columns can continue to be +used so long as the names and meanings (and to some extent data types) remain +the same. + +2. New tables and columns can be added, without affecting existing SQL queries, +so long as the new names are unique. + +3. Scripts that use a database (e.g. exported-sql-viewer.py) can maintain +backward compatibility by testing for the presence of new tables and columns +before using them. e.g. function IsSelectable() in exported-sql-viewer.py + +4. The export scripts themselves maintain forward compatibility (i.e. an existing +script will continue to work with new versions of perf) by accepting a variable +number of arguments (e.g. def call_return_table(*x)) i.e. perf can pass more +arguments which old scripts will ignore. + +5. The scripting engine tests for the existence of script handler functions +before calling them. The scripting engine can also test for the support of new +or optional features by checking for the existence and value of script global +variables.
[tip:perf/core] perf thread-stack: Accumulate IPC information
Commit-ID: 003ccdc7165accee073ce261fc670f64cc98d0f7 Gitweb: https://git.kernel.org/tip/003ccdc7165accee073ce261fc670f64cc98d0f7 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:19 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf thread-stack: Accumulate IPC information Cycle and instruction counts are added to the stack. The IPC of a function and all functions it calls, is also recorded. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-14-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 14 ++ tools/perf/util/thread-stack.h | 4 2 files changed, 18 insertions(+) diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 41942c2aaa18..8e390f78486f 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -49,6 +49,8 @@ enum retpoline_state_t { * @timestamp: timestamp (if known) * @ref: external reference (e.g. db_id of sample) * @branch_count: the branch count when the entry was created + * @insn_count: the instruction count when the entry was created + * @cyc_count the cycle count when the entry was created * @db_id: id used for db-export * @cp: call path * @no_call: a 'call' was not seen @@ -60,6 +62,8 @@ struct thread_stack_entry { u64 timestamp; u64 ref; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 db_id; struct call_path *cp; bool no_call; @@ -75,6 +79,8 @@ struct thread_stack_entry { * @sz: current maximum stack size * @trace_nr: current trace number * @branch_count: running branch count + * @insn_count: running instruction count + * @cyc_count running cycle count * @kernel_start: kernel start address * @last_time: last timestamp * @crp: call/return processor @@ -88,6 +94,8 @@ struct thread_stack { size_t sz; u64 trace_nr; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 kernel_start; u64 last_time; struct call_return_processor *crp; @@ -289,6 +297,8 @@ static int thread_stack__call_return(struct thread *thread, cr.call_time = tse->timestamp; cr.return_time = timestamp; cr.branch_count = ts->branch_count - tse->branch_count; + cr.insn_count = ts->insn_count - tse->insn_count; + cr.cyc_count = ts->cyc_count - tse->cyc_count; cr.db_id = tse->db_id; cr.call_ref = tse->ref; cr.return_ref = ref; @@ -544,6 +554,8 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->timestamp = timestamp; tse->ref = ref; tse->branch_count = ts->branch_count; + tse->insn_count = ts->insn_count; + tse->cyc_count = ts->cyc_count; tse->cp = cp; tse->no_call = no_call; tse->trace_end = trace_end; @@ -874,6 +886,8 @@ int thread_stack__process(struct thread *thread, struct comm *comm, } ts->branch_count += 1; + ts->insn_count += sample->insn_cnt; + ts->cyc_count += sample->cyc_cnt; ts->last_time = sample->time; if (sample->flags & PERF_IP_FLAG_CALL) { diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 9c45f947f5a9..bddb1daf6453 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -52,6 +52,8 @@ enum { * @call_time: timestamp of call (if known) * @return_time: timestamp of return (if known) * @branch_count: number of branches seen between call and return + * @insn_count: approx. number of instructions between call and return + * @cyc_count: approx. number of cycles between call and return * @call_ref: external reference to 'call' sample (e.g. db_id) * @return_ref: external reference to 'return' sample (e.g. db_id) * @db_id: id used for db-export @@ -65,6 +67,8 @@ struct call_return { u64 call_time; u64 return_time; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 call_ref; u64 return_ref; u64 db_id;
[tip:perf/core] perf intel-pt: Document IPC usage
Commit-ID: 5db47f43ccbbdee8c48f76ace4c287187a28b87f Gitweb: https://git.kernel.org/tip/5db47f43ccbbdee8c48f76ace4c287187a28b87f Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:18 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:57 -0300 perf intel-pt: Document IPC usage Add brief documentation about instructions-per-cycle (IPC) information derived from Intel PT. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-13-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 60d99e5e7921..50c5b60101bd 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -103,6 +103,36 @@ The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. +Another interesting field that is not printed by default is 'ipc' which can be +displayed as follows: + + perf script --itrace=be -F+ipc + +There are two ways that instructions-per-cycle (IPC) can be calculated depending +on the recording. + +If the 'cyc' config term (see config terms section below) was used, then IPC is +calculated using the cycle count from CYC packets, otherwise MTC packets are +used - refer to the 'mtc' config term. When MTC is used, however, the values +are less accurate because the timing is less accurate. + +Because Intel PT does not update the cycle count on every branch or instruction, +the values will often be zero. When there are values, they will be the number +of instructions and number of cycles since the last update, and thus represent +the average IPC since the last IPC for that event type. Note IPC for "branches" +events is calculated separately from IPC for "instructions" events. + +Also note that the IPC instruction count may or may not include the current +instruction. If the cycle count is associated with an asynchronous branch +(e.g. page fault or interrupt), then the instruction count does not include the +current instruction, otherwise it does. That is consistent with whether or not +that instruction has retired when the cycle count is updated. + +Another note, in the case of "branches" events, non-taken branches are not +presently sampled, so IPC values for them do not appear e.g. a CYC packet with a +TNT packet that starts with a non-taken branch. To see every possible IPC +value, "instructions" events can be used e.g. --itrace=i0ns + While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
[tip:perf/core] perf intel-pt: Re-factor TIP cases in intel_pt_walk_to_ip
Commit-ID: f3c98c4b5ac831f29b1cc19fa84d3c8401f846d6 Gitweb: https://git.kernel.org/tip/f3c98c4b5ac831f29b1cc19fa84d3c8401f846d6 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:16 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:56 -0300 perf intel-pt: Re-factor TIP cases in intel_pt_walk_to_ip To make it easier to add new code for different TIP cases, separate each case. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-11-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 23 -- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index a2384a314990..99773445872d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2128,18 +2128,29 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: decoder->continuous_period = false; - __fallthrough; + decoder->pge = false; + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_END; + return 0; + case INTEL_PT_TIP_PGE: + decoder->pge = true; + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + return 0; + case INTEL_PT_TIP: - decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + decoder->pge = true; if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (!decoder->ip) break; - if (decoder->packet.type == INTEL_PT_TIP_PGE) - decoder->state.type |= INTEL_PT_TRACE_BEGIN; - if (decoder->packet.type == INTEL_PT_TIP_PGD) - decoder->state.type |= INTEL_PT_TRACE_END; return 0; case INTEL_PT_FUP:
[tip:perf/core] perf script: Add output of IPC ratio
Commit-ID: 68fb45bf175e702aec6668c776050e5dbd2a6f1f Gitweb: https://git.kernel.org/tip/68fb45bf175e702aec6668c776050e5dbd2a6f1f Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:14 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:56 -0300 perf script: Add output of IPC ratio Add field 'ipc' to display instructions-per-cycle. Example: perf record -e intel_pt/cyc/u ls perf script --insn-trace --xed -F+ipc,-dso,-cpu,-tid ls 2670177.697113434: 7f0dfdbcd090 _start+0x0 mov %rsp, %rdi IPC: 0.00 (1/877) ls 2670177.697113434: 7f0dfdbcd093 _start+0x3 callq 0x7f0dfdbce030 ls 2670177.697113434: 7f0dfdbce030 _dl_start+0x0 pushq %rbp ls 2670177.697113434: 7f0dfdbce031 _dl_start+0x1 mov %rsp, %rbp ls 2670177.697113434: 7f0dfdbce034 _dl_start+0x4 pushq %r15 ls 2670177.697113434: 7f0dfdbce036 _dl_start+0x6 pushq %r14 ls 2670177.697113434: 7f0dfdbce038 _dl_start+0x8 pushq %r13 ls 2670177.697113434: 7f0dfdbce03a _dl_start+0xa pushq %r12 ls 2670177.697113434: 7f0dfdbce03c _dl_start+0xc mov %rdi, %r12 ls 2670177.697113434: 7f0dfdbce03f _dl_start+0xf pushq %rbx ls 2670177.697113434: 7f0dfdbce040 _dl_start+0x10 sub $0x38, %rsp ls 2670177.697113434: 7f0dfdbce044 _dl_start+0x14 rdtsc ls 2670177.697113434: 7f0dfdbce046 _dl_start+0x16 mov %eax, %eax ls 2670177.697113434: 7f0dfdbce048 _dl_start+0x18 shl $0x20, %rdx ls 2670177.697113434: 7f0dfdbce04c _dl_start+0x1c or %rax, %rdx ls 2670177.697114471: 7f0dfdbce04f _dl_start+0x1f movq 0x27e22(%rip), %rax IPC: 0.00 (15/1685) ls 2670177.697116177: 7f0dfdbce056 _dl_start+0x26 movq %rdx, 0x27683(%rip) IPC: 0.00 (1/881) Note, the IPC values are low due to page faults at the beginning of execution. The additional cycles are due to the time to enter the kernel, not the actual kernel page fault handler. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-9-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 - tools/perf/builtin-script.c | 23 ++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index af8282782911..c59fd52e9e91 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, -brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode. +brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -203,6 +203,9 @@ OPTIONS The synth field is used by synthesized events which may be created when Instruction Trace decoding. + The ipc (instructions per cycle) field is synthesized and may have a value when + Instruction Trace decoding. + Finally, a user may not set fields to none for all event types. i.e., -F "" is not allowed. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3a48a2627670..80c722ade852 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -102,6 +102,7 @@ enum perf_output_field { PERF_OUTPUT_METRIC = 1U << 28, PERF_OUTPUT_MISC= 1U << 29, PERF_OUTPUT_SRCCODE = 1U << 30, + PERF_OUTPUT_IPC = 1U << 31, }; struct output_option { @@ -139,6 +140,7 @@ struct output_option { {.str = "metric", .field = PERF_OUTPUT_METRIC}, {.str = "misc", .field = PERF_OUTPUT_MISC}, {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, + {.str = "ipc", .field = PERF_OUTPUT_IPC}, }; enum { @@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, return printed; } +static int perf_sample__fprintf_ipc(struct perf_sample *sample, + struct perf_event_attr *attr, FILE *fp) +{ + unsigned int ipc; + + if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt) + return 0; + + ipc = (sample->insn_cnt * 100) / sample->cyc_cnt; + + return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ", + ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt); +} + static int perf_sample__fprintf_bts(struct perf_sample *sample, struct p
[tip:perf/core] perf intel-pt: Record when decoding PSB+ packets
Commit-ID: 9bc668e3bca8fadc50d5a121a1992a72ada0d3f4 Gitweb: https://git.kernel.org/tip/9bc668e3bca8fadc50d5a121a1992a72ada0d3f4 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:15 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:56 -0300 perf intel-pt: Record when decoding PSB+ packets In preparation for using MTC packets to count cycles, record whether decoding is between a PSB and PSBEND packets. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-10-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 41 -- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index ef3a1c1cd250..a2384a314990 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -116,6 +116,7 @@ struct intel_pt_decoder { bool have_cyc; bool fixup_last_mtc; bool have_last_ip; + bool in_psb; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; @@ -1549,14 +1550,17 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_TIP_PGD: case INTEL_PT_TIP_PGE: @@ -1574,10 +1578,12 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_PWRX: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); - return -EAGAIN; + err = -EAGAIN; + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); @@ -1623,6 +1629,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) @@ -1996,10 +2006,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: @@ -2015,7 +2027,8 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_PWRE: case INTEL_PT_PWRX: intel_pt_log("ERROR: Unexpected packet\n"); - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_FUP: decoder->pge = true; @@ -2074,16 +2087,20 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_ERR4; else decoder->pkt_state = INTEL_PT_STATE_ERR3; - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_BAD: /* Does not happen */ - return intel_pt_bug(decoder); + err = intel_pt_bug(decoder); + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_PSB: case INTEL_PT_VMCS: @@ -2093,6 +2110,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
[tip:perf/core] perf intel-pt: Accumulate cycle count from TSC/TMA/MTC packets
Commit-ID: 3f05516758bef438cef7adc47599f8b8faad7c3a Gitweb: https://git.kernel.org/tip/3f05516758bef438cef7adc47599f8b8faad7c3a Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:17 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:56 -0300 perf intel-pt: Accumulate cycle count from TSC/TMA/MTC packets When CYC packets are not available, it is still possible to count cycles using TSC/TMA/MTC timestamps. As the timestamp increments in TSC ticks, convert to CPU cycles using the current core-to-bus ratio. Do not accumulate cycles when control flow packet generation is not enabled, nor when time has been "lost", typically due to mwait, which is indicated by a TSC/TMA packet that is not part of PSB+. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-12-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 51 ++ 1 file changed, 51 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 99773445872d..9eb778f9c911 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -163,6 +163,9 @@ struct intel_pt_decoder { uint64_t last_masked_timestamp; uint64_t tot_cyc_cnt; uint64_t sample_tot_cyc_cnt; + uint64_t base_cyc_cnt; + uint64_t cyc_cnt_timestamp; + double tsc_to_cyc; bool continuous_period; bool overflow; bool set_fup_tx_flags; @@ -1423,6 +1426,42 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) return -EOVERFLOW; } +static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder) +{ + if (decoder->have_cyc) + return; + + decoder->cyc_cnt_timestamp = decoder->timestamp; + decoder->base_cyc_cnt = decoder->tot_cyc_cnt; +} + +static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder) +{ + decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp; + + if (decoder->pge) + intel_pt_mtc_cyc_cnt_pge(decoder); +} + +static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder) +{ + uint64_t tot_cyc_cnt, tsc_delta; + + if (decoder->have_cyc) + return; + + decoder->sample_cyc = true; + + if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp) + return; + + tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp; + tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt; + + if (tot_cyc_cnt > decoder->tot_cyc_cnt) + decoder->tot_cyc_cnt = tot_cyc_cnt; +} + static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) { uint32_t ctc = decoder->packet.payload; @@ -1432,6 +1471,11 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) if (!decoder->tsc_ctc_ratio_d) return; + if (decoder->pge && !decoder->in_psb) + intel_pt_mtc_cyc_cnt_pge(decoder); + else + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; decoder->ctc_timestamp = decoder->tsc_timestamp - fc; if (decoder->tsc_ctc_mult) { @@ -1487,6 +1531,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) else decoder->timestamp = timestamp; + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->timestamp_insn_cnt = 0; decoder->last_mtc = mtc; @@ -1511,6 +1557,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) decoder->cbr = cbr; decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + + intel_pt_mtc_cyc_cnt_cbr(decoder); } static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) @@ -1706,6 +1754,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) decoder->state.to_ip = decoder->ip; } decoder->state.type |= INTEL_PT_TRACE_BEGIN; + intel_pt_mtc_cyc_cnt_pge(decoder); return 0; case INTEL_PT_TIP: @@ -1776,6 +1825,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + intel_pt_mtc_cyc_cnt_pge(decoder); if (decoder->packet.count == 0) { intel_pt_log_at("Skipping zero TIP.PGE", decoder->pos); @@ -2138,6 +2188,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: decoder->pge = true; + intel_pt_mtc_
[tip:perf/core] perf intel-pt: Add support for samples to contain IPC ratio
Commit-ID: 5b1dc0fd1da06d6e89f1ca8736cfe0ee84e34cc7 Gitweb: https://git.kernel.org/tip/5b1dc0fd1da06d6e89f1ca8736cfe0ee84e34cc7 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:13 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:56 -0300 perf intel-pt: Add support for samples to contain IPC ratio Copy the incremental instruction count and cycle count onto 'instructions' and 'branches' samples. Because Intel PT does not update the cycle count on every branch or instruction, the incremental values will often be zero. When there are values, they will be the number of instructions and number of cycles since the last update, and thus represent the average IPC since the last IPC value. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-8-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 29 + 1 file changed, 29 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 7a70693c1b91..3cff8fe2eaa0 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -157,6 +157,12 @@ struct intel_pt_queue { u32 flags; u16 insn_len; u64 last_insn_cnt; + u64 ipc_insn_cnt; + u64 ipc_cyc_cnt; + u64 last_in_insn_cnt; + u64 last_in_cyc_cnt; + u64 last_br_insn_cnt; + u64 last_br_cyc_cnt; char insn[INTEL_PT_INSN_BUF_SZ]; }; @@ -1162,6 +1168,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; + ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; + } + return intel_pt_deliver_synth_b_event(pt, event, &sample, pt->branches_sample_type); } @@ -1217,6 +1230,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.stream_id = ptq->pt->instructions_id; sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; + ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; + } + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; return intel_pt_deliver_synth_event(pt, ptq, event, &sample, @@ -1488,6 +1508,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; + if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { + /* +* Cycle count and instruction count only go together to create +* a valid IPC ratio when the cycle count changes. +*/ + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + } + if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { if (state->type & INTEL_PT_CBR_CHG) { err = intel_pt_synth_cbr_sample(ptq);
[tip:perf/core] perf tools: Add IPC information to perf_sample
Commit-ID: 61d276f428a11f0e4ce5203462fa488e6570684f Gitweb: https://git.kernel.org/tip/61d276f428a11f0e4ce5203462fa488e6570684f Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:12 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:55 -0300 perf tools: Add IPC information to perf_sample Add counts of instructions and cycles, in order to represent instructions-per-cycle (IPC). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-7-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9e999550f247..1f1da6082806 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -204,6 +204,8 @@ struct perf_sample { u64 period; u64 weight; u64 transaction; + u64 insn_cnt; + u64 cyc_cnt; u32 cpu; u32 raw_size; u64 data_src;
[tip:perf/core] perf intel-pt: Accumulate cycle count from CYC packets
Commit-ID: 7b4b4f83881e11b1fe5d8743953f81addb0871de Gitweb: https://git.kernel.org/tip/7b4b4f83881e11b1fe5d8743953f81addb0871de Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:11 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:54 -0300 perf intel-pt: Accumulate cycle count from CYC packets In preparation for providing instructions-per-cycle (IPC) information, accumulate cycle count from CYC packets. Although CYC packets are optional (requires config term 'cyc' to enable cycle-accurate mode when recording), the simplest way to count cycles is with CYC packets. The first complication is that cycles must be counted only when also counting instructions. That means when control flow packet generation is enabled i.e. between TIP.PGE and TIP.PGD packets. Also, sampling the cycle count follows the same rules as sampling the timestamp, that is, not before the instruction to which the decoder is walking is reached. In addition, the cycle count is not accurate for any but the first branch of a TNT packet. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-6-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 14 +- tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 1ab4070b5633..ef3a1c1cd250 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -160,6 +160,8 @@ struct intel_pt_decoder { uint64_t period_mask; uint64_t period_ticks; uint64_t last_masked_timestamp; + uint64_t tot_cyc_cnt; + uint64_t sample_tot_cyc_cnt; bool continuous_period; bool overflow; bool set_fup_tx_flags; @@ -167,6 +169,7 @@ struct intel_pt_decoder { bool set_fup_mwait; bool set_fup_pwre; bool set_fup_exstop; + bool sample_cyc; unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t fup_ptw_payload; @@ -1323,6 +1326,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) decoder->ip += intel_pt_insn.length; return 0; } + decoder->sample_cyc = false; decoder->ip += intel_pt_insn.length; if (!decoder->tnt.count) { intel_pt_update_sample_time(decoder); @@ -1515,6 +1519,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->have_cyc = true; decoder->cycle_cnt += decoder->packet.payload; + if (decoder->pge) + decoder->tot_cyc_cnt += decoder->packet.payload; + decoder->sample_cyc = true; if (!decoder->cyc_ref_timestamp) return; @@ -2419,6 +2426,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } else { decoder->state.err = 0; if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { @@ -2426,14 +2434,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; } - if (intel_pt_sample_time(decoder->pkt_state)) + if (intel_pt_sample_time(decoder->pkt_state)) { intel_pt_update_sample_time(decoder); + if (decoder->sample_cyc) + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; + } } decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt; return &decoder->state; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index ed088d4726ba..6a61773dc44b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -77,6 +77,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t cr3; uint64_t tot_insn_cnt; + uint64_t tot_cyc_cnt; uint64_t timestamp; uint64_t est_times
[tip:perf/core] perf intel-pt: Factor out intel_pt_update_sample_time
Commit-ID: 948e9dc8bb266649a618ac974010292bf36fb213 Gitweb: https://git.kernel.org/tip/948e9dc8bb266649a618ac974010292bf36fb213 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:10 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 5 Jun 2019 09:47:54 -0300 perf intel-pt: Factor out intel_pt_update_sample_time To eliminate some duplication and make the code more understandable, factor out intel_pt_update_sample_time. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-5-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f4c3c84b090f..1ab4070b5633 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -479,6 +479,12 @@ static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) return -EBADMSG; } +static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) +{ + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; +} + static int intel_pt_get_data(struct intel_pt_decoder *decoder) { struct intel_pt_buffer buffer = { .buf = 0, }; @@ -1319,8 +1325,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) } decoder->ip += intel_pt_insn.length; if (!decoder->tnt.count) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); return -EAGAIN; } decoder->tnt.payload <<= 1; @@ -2413,8 +2418,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); } else { decoder->state.err = 0; if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { @@ -2422,10 +2426,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; } - if (intel_pt_sample_time(decoder->pkt_state)) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; - } + if (intel_pt_sample_time(decoder->pkt_state)) + intel_pt_update_sample_time(decoder); } decoder->state.timestamp = decoder->sample_timestamp;
[tip:perf/core] perf intel-pt: Rationalize intel_pt_sync_switch()'s use of next_tid
Commit-ID: 14f1cfd4f7b4794e2f9d2ae214bcf049654b0b5c Gitweb: https://git.kernel.org/tip/14f1cfd4f7b4794e2f9d2ae214bcf049654b0b5c Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:30 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf intel-pt: Rationalize intel_pt_sync_switch()'s use of next_tid Returning 1 from intel_pt_sync_switch() causes the current tid to be set. That negates the need to keep next_tid anymore. Rationalize the code to that effect. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-9-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6aaba1146fc8..7a70693c1b91 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1859,7 +1859,6 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, switch (ptq->switch_state) { case INTEL_PT_SS_NOT_TRACING: - ptq->next_tid = -1; break; case INTEL_PT_SS_UNKNOWN: case INTEL_PT_SS_TRACING: @@ -1879,13 +1878,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, ptq->switch_state = INTEL_PT_SS_TRACING; break; case INTEL_PT_SS_EXPECTING_SWITCH_IP: - ptq->next_tid = tid; intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); break; default: break; } + ptq->next_tid = -1; + return 1; }
[tip:perf/core] perf intel-pt: Improve sync_switch by processing PERF_RECORD_SWITCH* in events
Commit-ID: c7b4f15ff79b539fed4c382e52e988548081bc9d Gitweb: https://git.kernel.org/tip/c7b4f15ff79b539fed4c382e52e988548081bc9d Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:29 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf intel-pt: Improve sync_switch by processing PERF_RECORD_SWITCH* in events sync_switch is a facility to synchronize decoding more closely with the point in the kernel when the context actually switched. Improve it by processing "context switch in" events. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-8-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 40 +++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 03b1da6d1da4..6aaba1146fc8 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1914,6 +1914,44 @@ static int intel_pt_process_switch(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, -1, tid); } +static int intel_pt_context_switch_in(struct intel_pt *pt, + struct perf_sample *sample) +{ + pid_t pid = sample->pid; + pid_t tid = sample->tid; + int cpu = sample->cpu; + + if (pt->sync_switch) { + struct intel_pt_queue *ptq; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (ptq && ptq->sync_switch) { + ptq->next_tid = -1; + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + break; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + break; + } + } + } + + /* +* If the current tid has not been updated yet, ensure it is now that +* a "switch in" event has occurred. +*/ + if (machine__get_current_tid(pt->machine, cpu) == tid) + return 0; + + return machine__set_current_tid(pt->machine, cpu, pid, tid); +} + static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { @@ -1925,7 +1963,7 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, if (pt->have_sched_switch == 3) { if (!out) - return 0; + return intel_pt_context_switch_in(pt, sample); if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { pr_err("Expecting CPU-wide context switch event\n"); return -EINVAL;
[tip:perf/core] perf scripts python: export-to-postgresql.py: Add support for pyside2
Commit-ID: 3cd3216dbb421244b96b992f193e778a3baa2220 Gitweb: https://git.kernel.org/tip/3cd3216dbb421244b96b992f193e778a3baa2220 Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:27 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf scripts python: export-to-postgresql.py: Add support for pyside2 pyside2 is the future for pyside support. Note pyside use Qt4 whereas pyside2 uses Qt5. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-6-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 43 ++- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index c3eae1d77d36..b2f481b0d28d 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -27,18 +27,31 @@ import datetime # # fedora: # -# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql +# $ sudo yum install postgresql postgresql-server qt-postgresql # $ sudo su - postgres -c initdb # $ sudo service postgresql start # $ sudo su - postgres -# $ createuser +# $ createuser -s # Older versions may not support -s, in which case answer the prompt below: # Shall the new role be a superuser? (y/n) y +# $ sudo yum install python-pyside +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# $ sudo yum install python3-pyside +# $ pip install --user PySide2 +# $ pip3 install --user PySide2 # # ubuntu: # -# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql +# $ sudo apt-get install postgresql # $ sudo su - postgres # $ createuser -s +# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# +# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql +# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql +# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql # # An example of using this script with Intel PT: # @@ -199,7 +212,16 @@ import datetime # print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) # call_path_id = query.value(6) -from PySide.QtSql import * +pyside_version_1 = True +if not "pyside-version-1" in sys.argv: + try: + from PySide2.QtSql import * + pyside_version_1 = False + except: + pass + +if pyside_version_1: + from PySide.QtSql import * if sys.version_info < (3, 0): def toserverstr(str): @@ -255,11 +277,12 @@ def printdate(*args, **kw_args): print(datetime.datetime.today(), *args, sep=' ', **kw_args) def usage(): - printerr("Usage is: export-to-postgresql.py [] [] []") - printerr("where:columns 'all' or 'branches'") - printerr(" calls 'calls' => create calls and call_paths table") - printerr(" callchains 'callchains' => create call_paths table") - raise Exception("Too few arguments") + printerr("Usage is: export-to-postgresql.py [] [] [] []"); + printerr("where: columns'all' or 'branches'"); + printerr("calls 'calls' => create calls and call_paths table"); + printerr("callchains 'callchains' => create call_paths table"); + printerr("pyside-version-1 'pyside-version-1' => use pyside version 1"); + raise Exception("Too few or bad arguments") if (len(sys.argv) < 2): usage() @@ -281,6 +304,8 @@ for i in range(3,len(sys.argv)): perf_db_export_calls = True elif (sys.argv[i] == "callchains"): perf_db_export_callchains = True + elif (sys.argv[i] == "pyside-version-1"): + pass else: usage()
[tip:perf/core] perf scripts python: export-to-sqlite.py: Add support for pyside2
Commit-ID: bfb3170e2481b76a4f8aae94176e45d681a37f3e Gitweb: https://git.kernel.org/tip/bfb3170e2481b76a4f8aae94176e45d681a37f3e Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:26 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf scripts python: export-to-sqlite.py: Add support for pyside2 pyside2 is the future for pyside support. Note pyside use Qt4 whereas pyside2 uses Qt5. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-5-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 44 +++ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index bf271fbc3a88..f617e518332f 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -21,6 +21,26 @@ import datetime # provides LGPL-licensed Python bindings for Qt. You will also need the package # libqt4-sql-sqlite for Qt sqlite3 support. # +# Examples of installing pyside: +# +# ubuntu: +# +# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# +# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql +# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql +# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql +# fedora: +# +# $ sudo yum install python-pyside +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# $ sudo yum install python3-pyside +# $ pip install --user PySide2 +# $ pip3 install --user PySide2 +# # An example of using this script with Intel PT: # # $ perf record -e intel_pt//u ls @@ -49,7 +69,16 @@ import datetime # difference is the 'transaction' column of the 'samples' table which is # renamed 'transaction_' in sqlite because 'transaction' is a reserved word. -from PySide.QtSql import * +pyside_version_1 = True +if not "pyside-version-1" in sys.argv: + try: + from PySide2.QtSql import * + pyside_version_1 = False + except: + pass + +if pyside_version_1: + from PySide.QtSql import * sys.path.append(os.environ['PERF_EXEC_PATH'] + \ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') @@ -69,11 +98,12 @@ def printdate(*args, **kw_args): print(datetime.datetime.today(), *args, sep=' ', **kw_args) def usage(): - printerr("Usage is: export-to-sqlite.py [] [] []"); - printerr("where:columns 'all' or 'branches'"); - printerr(" calls 'calls' => create calls and call_paths table"); - printerr(" callchains 'callchains' => create call_paths table"); - raise Exception("Too few arguments") + printerr("Usage is: export-to-sqlite.py [] [] [] []"); + printerr("where: columns'all' or 'branches'"); + printerr("calls 'calls' => create calls and call_paths table"); + printerr("callchains 'callchains' => create call_paths table"); + printerr("pyside-version-1 'pyside-version-1' => use pyside version 1"); + raise Exception("Too few or bad arguments") if (len(sys.argv) < 2): usage() @@ -95,6 +125,8 @@ for i in range(3,len(sys.argv)): perf_db_export_calls = True elif (sys.argv[i] == "callchains"): perf_db_export_callchains = True + elif (sys.argv[i] == "pyside-version-1"): + pass else: usage()
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Add support for pyside2
Commit-ID: df8ea22a8fd9e4e8502f4fa917622801e1b4d09e Gitweb: https://git.kernel.org/tip/df8ea22a8fd9e4e8502f4fa917622801e1b4d09e Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:25 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf scripts python: exported-sql-viewer.py: Add support for pyside2 pyside2 is the future for pyside support. Note pyside use Qt4 whereas pyside2 uses Qt5. Committer testing: On a system with just: # rpm -qa| grep -i pyside python2-pyside-1.2.4-7.fc29.x86_64 # Running: $ python ~acme/libexec/perf-core/scripts/python/exported-sql-viewer.py ~/c/adrian.hunter/simple-retpoline.db & [1] 7438 Makes it use the pyside 1 files: $ grep -i pyside /proc/7438/maps | cut -d ' ' -f 6- | sort -u /usr/lib64/libpyside-python2.7.so.1.2.4 /usr/lib64/python2.7/site-packages/PySide/QtCore.so /usr/lib64/python2.7/site-packages/PySide/QtGui.so /usr/lib64/python2.7/site-packages/PySide/QtSql.so $ rpm -qf /usr/lib64/libpyside-python2.7.so.1.2.4 python2-pyside-1.2.4-7.fc29.x86_64 $ To get PySide2 I guess one needs to do: $ pip install PySide2 But thats a 142MiB download I can't do right now, perhaps before pushing upstream... Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 28 ++-- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 498b79454012..6fe553258ce5 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -105,10 +105,23 @@ except ImportError: glb_nsz = 16 import re import os -from PySide.QtCore import * -from PySide.QtGui import * -from PySide.QtSql import * + pyside_version_1 = True +if not "--pyside-version-1" in sys.argv: + try: + from PySide2.QtCore import * + from PySide2.QtGui import * + from PySide2.QtSql import * + from PySide2.QtWidgets import * + pyside_version_1 = False + except: + pass + +if pyside_version_1: + from PySide.QtCore import * + from PySide.QtGui import * + from PySide.QtSql import * + from decimal import * from ctypes import * from multiprocessing import Process, Array, Value, Event @@ -2755,7 +2768,7 @@ class WindowMenu(): action = self.window_menu.addAction(label) action.setCheckable(True) action.setChecked(sub_window == self.mdi_area.activeSubWindow()) - action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) + action.triggered.connect(lambda a=None,x=nr: self.setActiveSubWindow(x)) self.window_menu.addAction(action) nr += 1 @@ -3115,14 +3128,14 @@ class MainWindow(QMainWindow): event = event.split(":")[0] if event == "branches": label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" - reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self)) label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" - reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self)) def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") for table in tables: - table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) + table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda a=None,t=table: self.NewTableView(t), self)) def NewCallGraph(self): CallGraphWindow(self.glb, self) @@ -3365,6 +3378,7 @@ def Main(): usage_str = "exported-sql-viewer.py [--pyside-version-1] \n" \ " or: exported-sql-viewer.py --help-only" ap = argparse.Argumen
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Use argparse module for argument parsing
Commit-ID: 1ed7f47fd3af3c09d2cd64d1aff1c5b96d238111 Gitweb: https://git.kernel.org/tip/1ed7f47fd3af3c09d2cd64d1aff1c5b96d238111 Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:24 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf scripts python: exported-sql-viewer.py: Use argparse module for argument parsing The argparse module makes it easier to add new arguments. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 21 +++-- 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 9ff92a130655..498b79454012 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -91,6 +91,7 @@ from __future__ import print_function import sys +import argparse import weakref import threading import string @@ -3361,18 +3362,26 @@ class DBRef(): # Main def Main(): - if (len(sys.argv) < 2): - printerr("Usage is: exported-sql-viewer.py { | --help-only}"); - raise Exception("Too few arguments") - - dbname = sys.argv[1] - if dbname == "--help-only": + usage_str = "exported-sql-viewer.py [--pyside-version-1] \n" \ + " or: exported-sql-viewer.py --help-only" + ap = argparse.ArgumentParser(usage = usage_str, add_help = False) + ap.add_argument("dbname", nargs="?") + ap.add_argument("--help-only", action='store_true') + args = ap.parse_args() + + if args.help_only: app = QApplication(sys.argv) mainwindow = HelpOnlyWindow() mainwindow.show() err = app.exec_() sys.exit(err) + dbname = args.dbname + if dbname is None: + ap.print_usage() + print("Too few arguments") + sys.exit(1) + is_sqlite3 = False try: f = open(dbname, "rb")
[tip:perf/core] perf scripts python: exported-sql-viewer.py: Change python2 to python
Commit-ID: c6aba1bf258ff1ce201f112dafe1bdde601573dd Gitweb: https://git.kernel.org/tip/c6aba1bf258ff1ce201f112dafe1bdde601573dd Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:23 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:45 -0300 perf scripts python: exported-sql-viewer.py: Change python2 to python Now that there is also support for python3, there is no need to specify python2 explicitly. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index affed7d149be..9ff92a130655 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # exported-sql-viewer.py: view data from sql database # Copyright (c) 2014-2018, Intel Corporation.
[tip:perf/core] perf intel-pt: Fix itrace defaults for perf script intel-pt documentation
Commit-ID: a2d8a1585e35444789c1c8cf7e2e51fb15589880 Gitweb: https://git.kernel.org/tip/a2d8a1585e35444789c1c8cf7e2e51fb15589880 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:09 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:42 -0300 perf intel-pt: Fix itrace defaults for perf script intel-pt documentation Fix intel-pt documentation to reflect the change of itrace defaults for perf script. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: sta...@vger.kernel.org Fixes: 4eb068157121 ("perf script: Make itrace script default to all calls") Link: http://lkml.kernel.org/r/20190520113728.14389-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 115eaacc455f..60d99e5e7921 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -88,16 +88,16 @@ smaller. To represent software control flow, "branches" samples are produced. By default a branch sample is synthesized for every single branch. To get an idea what -data is available you can use the 'perf script' tool with no parameters, which -will list all the samples. +data is available you can use the 'perf script' tool with all itrace sampling +options, which will list all the samples. perf record -e intel_pt//u ls - perf script + perf script --itrace=ibxwpe An interesting field that is not printed by default is 'flags' which can be displayed as follows: - perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags + perf script --itrace=ibxwpe -F+flags The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and @@ -713,7 +713,7 @@ Having no option is the same as which, in turn, is the same as - --itrace=ibxwpe + --itrace=cepwx The letters are:
[tip:perf/core] perf auxtrace: Fix itrace defaults for perf script
Commit-ID: 355200e0f6a9ce14771625014aa469f5ecbd8977 Gitweb: https://git.kernel.org/tip/355200e0f6a9ce14771625014aa469f5ecbd8977 Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:08 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:42 -0300 perf auxtrace: Fix itrace defaults for perf script Commit 4eb068157121 ("perf script: Make itrace script default to all calls") does not work for the case when '--itrace' only is used, because default_no_sample is not being passed. Example: Before: $ perf record -e intel_pt/cyc/u ls $ perf script --itrace > cmp1.txt $ perf script --itrace=cepwx > cmp2.txt $ diff -sq cmp1.txt cmp2.txt Files cmp1.txt and cmp2.txt differ After: $ perf script --itrace > cmp1.txt $ perf script --itrace=cepwx > cmp2.txt $ diff -sq cmp1.txt cmp2.txt Files cmp1.txt and cmp2.txt are identical Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: sta...@vger.kernel.org Fixes: 4eb068157121 ("perf script: Make itrace script default to all calls") Link: http://lkml.kernel.org/r/20190520113728.14389-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index fb76b6b232d4..5dd9d1893b89 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1010,7 +1010,8 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } if (!str) { - itrace_synth_opts__set_default(synth_opts, false); + itrace_synth_opts__set_default(synth_opts, + synth_opts->default_no_sample); return 0; }
[tip:perf/core] perf intel-pt: Fix itrace defaults for perf script
Commit-ID: 26f19c2eb7e54015564ff133b91983a74e84541b Gitweb: https://git.kernel.org/tip/26f19c2eb7e54015564ff133b91983a74e84541b Author: Adrian Hunter AuthorDate: Mon, 20 May 2019 14:37:07 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:42 -0300 perf intel-pt: Fix itrace defaults for perf script Commit 4eb068157121 ("perf script: Make itrace script default to all calls") does not work because 'use_browser' is being used to determine whether to default to periodic sampling (i.e. better for perf report). The result is that nothing but CBR events display for perf script when no --itrace option is specified. Fix by using 'default_no_sample' and 'inject' instead. Example: Before: $ perf record -e intel_pt/cyc/u ls $ perf script > cmp1.txt $ perf script --itrace=cepwx > cmp2.txt $ diff -sq cmp1.txt cmp2.txt Files cmp1.txt and cmp2.txt differ After: $ perf script > cmp1.txt $ perf script --itrace=cepwx > cmp2.txt $ diff -sq cmp1.txt cmp2.txt Files cmp1.txt and cmp2.txt are identical Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: sta...@vger.kernel.org # v4.20+ Fixes: 90e457f7be08 ("perf tools: Add Intel PT support") Link: http://lkml.kernel.org/r/20190520113728.14389-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6d288237887b..03b1da6d1da4 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2588,7 +2588,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, } else { itrace_synth_opts__set_default(&pt->synth_opts, session->itrace_synth_opts->default_no_sample); - if (use_browser != -1) { + if (!session->itrace_synth_opts->default_no_sample && + !session->itrace_synth_opts->inject) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; }
[tip:perf/core] perf-with-kcore.sh: Always allow fix_buildid_cache_permissions
Commit-ID: a685c7a4a25c80f1f022b55830f2d894ee8847eb Gitweb: https://git.kernel.org/tip/a685c7a4a25c80f1f022b55830f2d894ee8847eb Author: Adrian Hunter AuthorDate: Fri, 12 Apr 2019 14:38:28 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Tue, 28 May 2019 18:37:42 -0300 perf-with-kcore.sh: Always allow fix_buildid_cache_permissions The user's buildid cache may contain entries added by root even if root has its own home directory (e.g. by using perfconfig to specify the same buildid dir), so remove that validation. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190412113830.4126-7-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-with-kcore.sh | 5 - 1 file changed, 5 deletions(-) diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index 7e47a7cbc195..2ad2fffdb209 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh @@ -111,11 +111,6 @@ fix_buildid_cache_permissions() USER_HOME=$(bash <<< "echo ~$SUDO_USER") - if [ "$HOME" != "$USER_HOME" ] ; then - echo "Fix unnecessary because root has a home: $HOME" >&2 - exit 1 - fi - echo "Fixing buildid cache permissions" find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown"$SUDO_USER" \{\} \;
[tip:perf/core] perf intel-pt: Fix sample timestamp wrt non-taken branches
Commit-ID: 1b6599a9d8e6c9f7e9b0476012383b1777f7fc93 Gitweb: https://git.kernel.org/tip/1b6599a9d8e6c9f7e9b0476012383b1777f7fc93 Author: Adrian Hunter AuthorDate: Fri, 10 May 2019 15:41:43 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Thu, 16 May 2019 14:17:24 -0300 perf intel-pt: Fix sample timestamp wrt non-taken branches The sample timestamp is updated to ensure that the timestamp represents the time of the sample and not a branch that the decoder is still walking towards. The sample timestamp is updated when the decoder returns, but the decoder does not return for non-taken branches. Update the sample timestamp then also. Note that commit 3f04d98e972b5 ("perf intel-pt: Improve sample timestamp") was also a stable fix and appears, for example, in v4.4 stable tree as commit a4ebb58fd124 ("perf intel-pt: Improve sample timestamp"). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: sta...@vger.kernel.org # v4.4+ Fixes: 3f04d98e972b ("perf intel-pt: Improve sample timestamp") Link: http://lkml.kernel.org/r/20190510124143.27054-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9cbd587489bf..f4c3c84b090f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1318,8 +1318,11 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) return 0; } decoder->ip += intel_pt_insn.length; - if (!decoder->tnt.count) + if (!decoder->tnt.count) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; return -EAGAIN; + } decoder->tnt.payload <<= 1; continue; }
[tip:perf/core] perf intel-pt: Fix improved sample timestamp
Commit-ID: 61b6e08dc8e3ea80b7485c9b3f875ddd45c8466b Gitweb: https://git.kernel.org/tip/61b6e08dc8e3ea80b7485c9b3f875ddd45c8466b Author: Adrian Hunter AuthorDate: Fri, 10 May 2019 15:41:42 +0300 Committer: Arnaldo Carvalho de Melo CommitDate: Thu, 16 May 2019 14:17:23 -0300 perf intel-pt: Fix improved sample timestamp The decoder uses its current timestamp in samples. Usually that is a timestamp that has already passed, but in some cases it is a timestamp for a branch that the decoder is walking towards, and consequently hasn't reached. The intel_pt_sample_time() function decides which is which, but was not handling TNT packets exactly correctly. In the case of TNT, the timestamp applies to the first branch, so the decoder must first walk to that branch. That means intel_pt_sample_time() should return true for TNT, and this patch makes that change. However, if the first branch is a non-taken branch (i.e. a 'N'), then intel_pt_sample_time() needs to return false for subsequent taken branches in the same TNT packet. To handle that, introduce a new state INTEL_PT_STATE_TNT_CONT to distinguish the cases. Note that commit 3f04d98e972b5 ("perf intel-pt: Improve sample timestamp") was also a stable fix and appears, for example, in v4.4 stable tree as commit a4ebb58fd124 ("perf intel-pt: Improve sample timestamp"). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: sta...@vger.kernel.org # v4.4+ Fixes: 3f04d98e972b5 ("perf intel-pt: Improve sample timestamp") Link: http://lkml.kernel.org/r/20190510124143.27054-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 26dbf11e071a..9cbd587489bf 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -58,6 +58,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_NO_IP, INTEL_PT_STATE_ERR_RESYNC, INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT_CONT, INTEL_PT_STATE_TNT, INTEL_PT_STATE_TIP, INTEL_PT_STATE_TIP_PGD, @@ -72,8 +73,9 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_NO_IP: case INTEL_PT_STATE_ERR_RESYNC: case INTEL_PT_STATE_IN_SYNC: - case INTEL_PT_STATE_TNT: + case INTEL_PT_STATE_TNT_CONT: return true; + case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: case INTEL_PT_STATE_TIP_PGD: case INTEL_PT_STATE_FUP: @@ -1261,7 +1263,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) return -ENOENT; } decoder->tnt.count -= 1; - if (!decoder->tnt.count) + if (decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_TNT_CONT; + else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->tnt.payload <<= 1; decoder->state.from_ip = decoder->ip; @@ -1292,7 +1296,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { decoder->tnt.count -= 1; - if (!decoder->tnt.count) + if (decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_TNT_CONT; + else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; if (decoder->tnt.payload & BIT63) { decoder->tnt.payload <<= 1; @@ -2372,6 +2378,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_walk_trace(decoder); break; case INTEL_PT_STATE_TNT: + case INTEL_PT_STATE_TNT_CONT: err = intel_pt_walk_tnt(decoder); if (err == -EAGAIN) err = intel_pt_walk_trace(decoder);