Implemented -f,--mmap-flush option that specifies threshold to postpone and/or trigger the move of data from mmaped kernel buffers to a storage.
The option can be used to avoid capturing every single byte of data into the stored trace. The default option value is 1. $ tools/perf/perf record -f 1024 -e cycles -- matrix.gcc $ tools/perf/perf record --aio -f 1024 -e cycles -- matrix.gcc Implemented sync param is the mean to force data move independently from the threshold value. Despite a user provides flush value from the command line, the tool needs capability to drain memory buffers, at least in the end of the collection. Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com> --- tools/perf/Documentation/perf-record.txt | 5 +++ tools/perf/builtin-record.c | 53 +++++++++++++++++++++--- tools/perf/perf.h | 1 + tools/perf/util/evlist.c | 6 +-- tools/perf/util/evlist.h | 3 +- tools/perf/util/mmap.c | 4 +- tools/perf/util/mmap.h | 3 +- 7 files changed, 63 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 8f0c2be34848..8276d6517812 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -459,6 +459,11 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer cpu - thread affinity mask is set to cpu of the processed mmap buffer +-f:: +--mmap-flush=n:: +Minimal number of bytes accumulated in mmaped kernel buffer that is flushed to a storage (default: 1). +Maximal allowed value is a quarter of mmaped kernel buffer size. + --all-kernel:: Configure all used events to run in kernel space. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6c3719ac901d..6235cc6b59e9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -334,6 +334,29 @@ static int record__aio_enabled(struct record *rec) return rec->opts.nr_cblocks > 0; } +#define MMAP_FLUSH_DEFAULT 1 +static int record__mmap_flush_parse(const struct option *opt, + const char *str, + int unset) +{ + int mmap_len; + struct record_opts *opts = (struct record_opts *)opt->value; + + if (unset) + return 0; + + if (str) + opts->mmap_flush = strtol(str, NULL, 0); + if (!opts->mmap_flush) + opts->mmap_flush = MMAP_FLUSH_DEFAULT; + + mmap_len = perf_evlist__mmap_size(opts->mmap_pages); + if (opts->mmap_flush > mmap_len / 4) + opts->mmap_flush = mmap_len / 4; + + return 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -543,7 +566,8 @@ static int record__mmap_evlist(struct record *rec, if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, - opts->nr_cblocks, opts->affinity) < 0) { + opts->nr_cblocks, opts->affinity, + opts->mmap_flush) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -734,7 +758,7 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) } static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, - bool overwrite) + bool overwrite, bool sync) { u64 bytes_written = rec->bytes_written; int i; @@ -757,12 +781,19 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli off = record__aio_get_pos(trace_fd); for (i = 0; i < evlist->nr_mmaps; i++) { + u64 flush = MMAP_FLUSH_DEFAULT; struct perf_mmap *map = &maps[i]; if (map->base) { record__adjust_affinity(rec, map); + if (sync) { + flush = map->flush; + map->flush = MMAP_FLUSH_DEFAULT; + } if (!record__aio_enabled(rec)) { if (perf_mmap__push(map, rec, record__pushfn) != 0) { + if (sync) + map->flush = flush; rc = -1; goto out; } @@ -775,10 +806,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli idx = record__aio_sync(map, false); if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) { record__aio_set_pos(trace_fd, off); + if (sync) + map->flush = flush; rc = -1; goto out; } } + if (sync) + map->flush = flush; } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && @@ -804,15 +839,15 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli return rc; } -static int record__mmap_read_all(struct record *rec) +static int record__mmap_read_all(struct record *rec, bool sync) { int err; - err = record__mmap_read_evlist(rec, rec->evlist, false); + err = record__mmap_read_evlist(rec, rec->evlist, false, sync); if (err) return err; - return record__mmap_read_evlist(rec, rec->evlist, true); + return record__mmap_read_evlist(rec, rec->evlist, true, sync); } static void record__init_features(struct record *rec) @@ -1311,7 +1346,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (trigger_is_hit(&switch_output_trigger) || done || draining) perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); - if (record__mmap_read_all(rec) < 0) { + if (record__mmap_read_all(rec, false) < 0) { trigger_error(&auxtrace_snapshot_trigger); trigger_error(&switch_output_trigger); err = -1; @@ -1412,6 +1447,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__synthesize_workload(rec, true); out_child: + record__mmap_read_all(rec, true); record__aio_mmap_read_sync(rec); if (forks) { @@ -1814,6 +1850,7 @@ static struct record record = { .uses_mmap = true, .default_per_cpu = true, }, + .mmap_flush = MMAP_FLUSH_DEFAULT, }, .tool = { .sample = process_sample_event, @@ -1880,6 +1917,9 @@ static struct option __record_options[] = { OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", "number of mmap data pages and AUX area tracing mmap pages", record__parse_mmap_pages), + OPT_CALLBACK('f', "mmap-flush", &record.opts, "bytes", + "Minimal number of bytes in mmap data pages that is written to a storage (default: 1)", + record__mmap_flush_parse), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, @@ -2183,6 +2223,7 @@ int cmd_record(int argc, const char **argv) pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); + pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); err = __cmd_record(&record, argc, argv); out: diff --git a/tools/perf/perf.h b/tools/perf/perf.h index b120e547ddc7..7886cc9771cf 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -85,6 +85,7 @@ struct record_opts { u64 clockid_res_ns; int nr_cblocks; int affinity; + int mmap_flush; }; enum perf_affinity { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 08cedb643ea6..937039faac59 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks, int affinity) + bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; + struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 744906dd4887..edf18811e39f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -165,7 +165,8 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks, int affinity); + bool auxtrace_overwrite, int nr_cblocks, + int affinity, int flush); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index cdc7740fc181..ef3d79b2c90b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -440,6 +440,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c perf_mmap__setup_affinity_mask(map, mp); + map->flush = mp->flush; + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) return -1; @@ -492,7 +494,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md) md->start = md->overwrite ? head : old; md->end = md->overwrite ? old : head; - if (md->start == md->end) + if ((md->end - md->start) < md->flush) return -EAGAIN; size = md->end - md->start; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e566c19b242b..b82f8c2d55c4 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,6 +39,7 @@ struct perf_mmap { } aio; #endif cpu_set_t affinity_mask; + u64 flush; }; /* @@ -70,7 +71,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask, nr_cblocks, affinity; + int prot, mask, nr_cblocks, affinity, flush; struct auxtrace_mmap_params auxtrace_mp; };