On Fri, Aug 30, 2019 at 02:24:20PM +0800, Leo Yan wrote: > Firstly, this patch adds support for the thread stack; when every branch > packet is coming we will push or pop the stack based on the sample > flags. > > Secondly, based on the thread stack we can synthesize call chain for the > instruction sample, this can be used by itrace option '--itrace=g'. >
In most cases using the word "secondly" is a good indication the patch should be split. > Signed-off-by: Leo Yan <leo....@linaro.org> > --- > tools/perf/util/cs-etm.c | 74 +++++++++++++++++++++++++++++++++++++++- > 1 file changed, 73 insertions(+), 1 deletion(-) > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c > index 882a0718033d..ad573d3bd305 100644 > --- a/tools/perf/util/cs-etm.c > +++ b/tools/perf/util/cs-etm.c > @@ -17,6 +17,7 @@ > #include <stdlib.h> > > #include "auxtrace.h" > +#include "callchain.h" > #include "color.h" > #include "cs-etm.h" > #include "cs-etm-decoder/cs-etm-decoder.h" > @@ -69,6 +70,7 @@ struct cs_etm_traceid_queue { > size_t last_branch_pos; > union perf_event *event_buf; > struct thread *thread; > + struct ip_callchain *chain; > struct branch_stack *last_branch; > struct branch_stack *last_branch_rb; > struct cs_etm_packet *prev_packet; > @@ -246,6 +248,16 @@ static int cs_etm__init_traceid_queue(struct > cs_etm_queue *etmq, > if (!tidq->prev_packet) > goto out_free; > > + if (etm->synth_opts.callchain) { > + size_t sz = sizeof(struct ip_callchain); > + > + /* Add 1 to callchain_sz for callchain context */ > + sz += (etm->synth_opts.callchain_sz + 1) * sizeof(u64); > + tidq->chain = zalloc(sz); > + if (!tidq->chain) > + goto out_free; > + } > + > if (etm->synth_opts.last_branch) { > size_t sz = sizeof(struct branch_stack); > > @@ -270,6 +282,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue > *etmq, > zfree(&tidq->last_branch); > zfree(&tidq->prev_packet); > zfree(&tidq->packet); > + zfree(&tidq->chain); > out: > return rc; > } > @@ -541,6 +554,7 @@ static void cs_etm__free_traceid_queues(struct > cs_etm_queue *etmq) > zfree(&tidq->last_branch_rb); > zfree(&tidq->prev_packet); > zfree(&tidq->packet); > + zfree(&tidq->chain); > zfree(&tidq); > > /* > @@ -1121,6 +1135,41 @@ static void cs_etm__copy_insn(struct cs_etm_queue > *etmq, > sample->insn_len, (void *)sample->insn); > } > > +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq, > + struct cs_etm_traceid_queue *tidq) > +{ > + struct cs_etm_auxtrace *etm = etmq->etm; > + u8 trace_chan_id = tidq->trace_chan_id; > + int insn_len; > + u64 from_ip, to_ip; > + > + if (etm->synth_opts.callchain || etm->synth_opts.thread_stack) { > + > + from_ip = cs_etm__last_executed_instr(tidq->prev_packet); > + to_ip = cs_etm__first_executed_instr(tidq->packet); > + > + /* > + * T32 instruction size might be 32-bit or 16-bit, decide by > + * calling cs_etm__t32_instr_size(). > + */ > + if (tidq->prev_packet->isa == CS_ETM_ISA_T32) > + insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, > + from_ip); > + /* Otherwise, A64 and A32 instruction size are always 32-bit. */ > + else > + insn_len = 4; > + > + thread_stack__event(tidq->thread, tidq->prev_packet->cpu, > + tidq->prev_packet->flags, > + from_ip, to_ip, insn_len, > + etmq->buffer->buffer_nr); > + } else { > + thread_stack__set_trace_nr(tidq->thread, > + tidq->prev_packet->cpu, > + etmq->buffer->buffer_nr); Please add a comment on what the above does. As a rule of thumb I add a comment per addition in a patch in order to help people understand what is happening and some of the reasonning behing the code. > + } > +} > + > static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, > struct cs_etm_traceid_queue *tidq, > u64 addr, u64 period) > @@ -1146,6 +1195,14 @@ static int cs_etm__synth_instruction_sample(struct > cs_etm_queue *etmq, > > cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); > > + if (etm->synth_opts.callchain) { > + thread_stack__sample(tidq->thread, tidq->packet->cpu, > + tidq->chain, > + etm->synth_opts.callchain_sz + 1, > + sample.ip, etm->kernel_start); > + sample.callchain = tidq->chain; > + } > + > if (etm->synth_opts.last_branch) { > cs_etm__copy_last_branch_rb(etmq, tidq); > sample.branch_stack = tidq->last_branch; > @@ -1329,6 +1386,8 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace > *etm, > attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; > } > > + if (etm->synth_opts.callchain) > + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; > if (etm->synth_opts.last_branch) > attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; > > @@ -1397,6 +1456,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, > tidq->period_instructions = instrs_over; > } > > + if (tidq->prev_packet->last_instr_taken_branch) > + cs_etm__add_stack_event(etmq, tidq); > + > if (etm->sample_branches) { > bool generate_sample = false; > > @@ -2596,7 +2658,17 @@ int cs_etm__process_auxtrace_info(union perf_event > *event, > } else { > itrace_synth_opts__set_default(&etm->synth_opts, > session->itrace_synth_opts->default_no_sample); > - etm->synth_opts.callchain = false; > + > + etm->synth_opts.thread_stack = > + session->itrace_synth_opts->thread_stack; > + } > + > + if (etm->synth_opts.callchain && !symbol_conf.use_callchain) { > + symbol_conf.use_callchain = true; > + if (callchain_register_param(&callchain_param) < 0) { > + symbol_conf.use_callchain = false; > + etm->synth_opts.callchain = false; > + } > } > > err = cs_etm__synth_events(etm, session); > -- > 2.17.1 >