> Em Fri, Oct 13, 2017 at 07:09:26AM -0700, kan.li...@intel.com escreveu: > > From: Kan Liang <kan.li...@intel.com> > > > > The process function process_synthesized_event writes the process > > result to perf.data, which is not multithreading friendly. > > > > Realloc buffer for each thread to temporarily keep the processing > > result. Write them to the perf.data at the end of event synthesization. > > The new method doesn't impact the final result, because > > - The order of the synthesized event is not important. > > - The number of synthesized event is limited. Usually, it only needs > > hundreds of Kilobyte to store all the synthesized event. > > It's unlikly failed because of lack of memory. > > Why not write to a per cpu file and then at the end merge them?
I just thought merging from memory should be faster than merging from files. > Leave > the memory management to the kernel, i.e. in most cases you may even not > end up touching the disk, when memory is plentiful, just rewind the per > event files and go on dumping to the main perf.data file. > Agree. I will do it. > At some point we may just don't do this merging, and keep per cpu files > all the way to perf report, etc. This would be a first foray into > that... > Yes, but it's a little bit complex. I think I will do it in a separate improvement patch series later. For now, I will do the file merge. Thanks, Kan > - Arnaldo > > > The threads number hard code to online CPU number. The following patch > > will introduce an option to set it. > > > > The multithreading synthesize is only available for per cpu monitoring. > > > > Signed-off-by: Kan Liang <kan.li...@intel.com> > > --- > > tools/perf/builtin-record.c | 86 > ++++++++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 81 insertions(+), 5 deletions(-) > > > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c > > index 4ede9bf..1978021 100644 > > --- a/tools/perf/builtin-record.c > > +++ b/tools/perf/builtin-record.c > > @@ -62,6 +62,11 @@ struct switch_output { > > bool set; > > }; > > > > +struct synthesize_buf { > > + void *entry; > > + size_t size; > > +}; > > + > > struct record { > > struct perf_tool tool; > > struct record_opts opts; > > @@ -80,6 +85,7 @@ struct record { > > bool timestamp_filename; > > struct switch_output switch_output; > > unsigned long long samples; > > + struct synthesize_buf *buf; > > }; > > > > static volatile int auxtrace_record__snapshot_started; > > @@ -120,14 +126,35 @@ static int record__write(struct record *rec, void > *bf, size_t size) > > return 0; > > } > > > > +static int buf__write(struct record *rec, int idx, void *bf, size_t size) > > +{ > > + size_t target_size = rec->buf[idx].size; > > + void *target; > > + > > + target = realloc(rec->buf[idx].entry, target_size + size); > > + if (target == NULL) > > + return -ENOMEM; > > + > > + memcpy(target + target_size, bf, size); > > + > > + rec->buf[idx].entry = target; > > + rec->buf[idx].size += size; > > + > > + return 0; > > +} > > + > > static int process_synthesized_event(struct perf_tool *tool, > > union perf_event *event, > > struct perf_sample *sample > __maybe_unused, > > struct machine *machine > __maybe_unused, > > - struct thread_info *thread > __maybe_unused) > > + struct thread_info *thread) > > { > > struct record *rec = container_of(tool, struct record, tool); > > - return record__write(rec, event, event->header.size); > > + > > + if (!perf_singlethreaded && thread) > > + return buf__write(rec, thread->idx, event, event- > >header.size); > > + else > > + return record__write(rec, event, event->header.size); > > } > > > > static int record__pushfn(void *to, void *bf, size_t size) > > @@ -690,6 +717,48 @@ static const struct perf_event_mmap_page > *record__pick_pc(struct record *rec) > > return NULL; > > } > > > > +static int record__multithread_synthesize(struct record *rec, > > + struct machine *machine, > > + struct perf_tool *tool, > > + struct record_opts *opts) > > +{ > > + int i, err, nr_thread = sysconf(_SC_NPROCESSORS_ONLN); > > + > > + if (nr_thread > 1) { > > + perf_set_multithreaded(); > > + > > + rec->buf = calloc(nr_thread, sizeof(struct synthesize_buf)); > > + if (rec->buf == NULL) { > > + pr_err("Could not do multithread synthesize\n"); > > + nr_thread = 1; > > + perf_set_singlethreaded(); > > + } > > + } > > + > > + err = __machine__synthesize_threads(machine, tool, &opts->target, > > + rec->evlist->threads, > > + process_synthesized_event, > > + opts->sample_address, > > + opts->proc_map_timeout, > nr_thread); > > + if (err < 0) > > + goto free; > > + > > + if (nr_thread > 1) { > > + for (i = 0; i < nr_thread; i++) > > + record__write(rec, rec->buf[i].entry, rec->buf[i].size); > > + } > > + > > +free: > > + if (nr_thread > 1) { > > + for (i = 0; i < nr_thread; i++) > > + free(rec->buf[i].entry); > > + free(rec->buf); > > + perf_set_singlethreaded(); > > + } > > + > > + return err; > > +} > > + > > static int record__synthesize(struct record *rec, bool tail) > > { > > struct perf_session *session = rec->session; > > @@ -766,9 +835,16 @@ static int record__synthesize(struct record *rec, > bool tail) > > perf_event__synthesize_guest_os, > tool); > > } > > > > - err = __machine__synthesize_threads(machine, tool, &opts->target, > rec->evlist->threads, > > - process_synthesized_event, opts- > >sample_address, > > - opts->proc_map_timeout, 1); > > + /* multithreading synthesize is only available for cpu monitoring */ > > + if (target__has_cpu(&opts->target)) > > + err = record__multithread_synthesize(rec, machine, tool, > opts); > > + else > > + err = __machine__synthesize_threads(machine, tool, > > + &opts->target, > > + rec->evlist->threads, > > + process_synthesized_event, > > + opts->sample_address, > > + opts->proc_map_timeout, > 1); > > out: > > return err; > > } > > -- > > 2.7.4