Implemented -f,--mmap-flush option that specifies threshold to postpone
and/or trigger the move of data from mmaped kernel buffers to a storage.

The option can be used to avoid capturing every single byte of data into
the stored trace. The default option value is 1.

  $ tools/perf/perf record -f 1024 -e cycles -- matrix.gcc
  $ tools/perf/perf record --aio -f 1024 -e cycles -- matrix.gcc

Implemented sync param is the mean to force data move independently from 
the threshold value. Despite a user provides flush value from the command 
line, the tool needs capability to drain memory buffers, at least in the 
end of the collection.

Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com>
---
 tools/perf/Documentation/perf-record.txt |  5 +++
 tools/perf/builtin-record.c              | 53 +++++++++++++++++++++---
 tools/perf/perf.h                        |  1 +
 tools/perf/util/evlist.c                 |  6 +--
 tools/perf/util/evlist.h                 |  3 +-
 tools/perf/util/mmap.c                   |  4 +-
 tools/perf/util/mmap.h                   |  3 +-
 7 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 8f0c2be34848..8276d6517812 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -459,6 +459,11 @@ Set affinity mask of trace reading thread according to the 
policy defined by 'mo
   node - thread affinity mask is set to NUMA node cpu mask of the processed 
mmap buffer
   cpu  - thread affinity mask is set to cpu of the processed mmap buffer
 
+-f::
+--mmap-flush=n::
+Minimal number of bytes accumulated in mmaped kernel buffer that is flushed to 
a storage (default: 1).
+Maximal allowed value is a quarter of mmaped kernel buffer size.
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6c3719ac901d..6235cc6b59e9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -334,6 +334,29 @@ static int record__aio_enabled(struct record *rec)
        return rec->opts.nr_cblocks > 0;
 }
 
+#define MMAP_FLUSH_DEFAULT 1
+static int record__mmap_flush_parse(const struct option *opt,
+                                   const char *str,
+                                   int unset)
+{
+       int mmap_len;
+       struct record_opts *opts = (struct record_opts *)opt->value;
+
+       if (unset)
+               return 0;
+
+       if (str)
+               opts->mmap_flush = strtol(str, NULL, 0);
+       if (!opts->mmap_flush)
+               opts->mmap_flush = MMAP_FLUSH_DEFAULT;
+
+       mmap_len = perf_evlist__mmap_size(opts->mmap_pages);
+       if (opts->mmap_flush > mmap_len / 4)
+               opts->mmap_flush = mmap_len / 4;
+
+       return 0;
+}
+
 static int process_synthesized_event(struct perf_tool *tool,
                                     union perf_event *event,
                                     struct perf_sample *sample __maybe_unused,
@@ -543,7 +566,8 @@ static int record__mmap_evlist(struct record *rec,
        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
                                 opts->auxtrace_mmap_pages,
                                 opts->auxtrace_snapshot_mode,
-                                opts->nr_cblocks, opts->affinity) < 0) {
+                                opts->nr_cblocks, opts->affinity,
+                                opts->mmap_flush) < 0) {
                if (errno == EPERM) {
                        pr_err("Permission error mapping pages.\n"
                               "Consider increasing "
@@ -734,7 +758,7 @@ static void record__adjust_affinity(struct record *rec, 
struct perf_mmap *map)
 }
 
 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist 
*evlist,
-                                   bool overwrite)
+                                   bool overwrite, bool sync)
 {
        u64 bytes_written = rec->bytes_written;
        int i;
@@ -757,12 +781,19 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
                off = record__aio_get_pos(trace_fd);
 
        for (i = 0; i < evlist->nr_mmaps; i++) {
+               u64 flush = MMAP_FLUSH_DEFAULT;
                struct perf_mmap *map = &maps[i];
 
                if (map->base) {
                        record__adjust_affinity(rec, map);
+                       if (sync) {
+                               flush = map->flush;
+                               map->flush = MMAP_FLUSH_DEFAULT;
+                       }
                        if (!record__aio_enabled(rec)) {
                                if (perf_mmap__push(map, rec, record__pushfn) 
!= 0) {
+                                       if (sync)
+                                               map->flush = flush;
                                        rc = -1;
                                        goto out;
                                }
@@ -775,10 +806,14 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
                                idx = record__aio_sync(map, false);
                                if (perf_mmap__aio_push(map, rec, idx, 
record__aio_pushfn, &off) != 0) {
                                        record__aio_set_pos(trace_fd, off);
+                                       if (sync)
+                                               map->flush = flush;
                                        rc = -1;
                                        goto out;
                                }
                        }
+                       if (sync)
+                               map->flush = flush;
                }
 
                if (map->auxtrace_mmap.base && 
!rec->opts.auxtrace_snapshot_mode &&
@@ -804,15 +839,15 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
        return rc;
 }
 
-static int record__mmap_read_all(struct record *rec)
+static int record__mmap_read_all(struct record *rec, bool sync)
 {
        int err;
 
-       err = record__mmap_read_evlist(rec, rec->evlist, false);
+       err = record__mmap_read_evlist(rec, rec->evlist, false, sync);
        if (err)
                return err;
 
-       return record__mmap_read_evlist(rec, rec->evlist, true);
+       return record__mmap_read_evlist(rec, rec->evlist, true, sync);
 }
 
 static void record__init_features(struct record *rec)
@@ -1311,7 +1346,7 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
                if (trigger_is_hit(&switch_output_trigger) || done || draining)
                        perf_evlist__toggle_bkw_mmap(rec->evlist, 
BKW_MMAP_DATA_PENDING);
 
-               if (record__mmap_read_all(rec) < 0) {
+               if (record__mmap_read_all(rec, false) < 0) {
                        trigger_error(&auxtrace_snapshot_trigger);
                        trigger_error(&switch_output_trigger);
                        err = -1;
@@ -1412,6 +1447,7 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
                record__synthesize_workload(rec, true);
 
 out_child:
+       record__mmap_read_all(rec, true);
        record__aio_mmap_read_sync(rec);
 
        if (forks) {
@@ -1814,6 +1850,7 @@ static struct record record = {
                        .uses_mmap   = true,
                        .default_per_cpu = true,
                },
+               .mmap_flush          = MMAP_FLUSH_DEFAULT,
        },
        .tool = {
                .sample         = process_sample_event,
@@ -1880,6 +1917,9 @@ static struct option __record_options[] = {
        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
                     "number of mmap data pages and AUX area tracing mmap 
pages",
                     record__parse_mmap_pages),
+       OPT_CALLBACK('f', "mmap-flush", &record.opts, "bytes",
+                    "Minimal number of bytes in mmap data pages that is 
written to a storage (default: 1)",
+                    record__mmap_flush_parse),
        OPT_BOOLEAN(0, "group", &record.opts.group,
                    "put the counters into a counter group"),
        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
@@ -2183,6 +2223,7 @@ int cmd_record(int argc, const char **argv)
                pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
 
        pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
+       pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
 
        err = __cmd_record(&record, argc, argv);
 out:
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index b120e547ddc7..7886cc9771cf 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -85,6 +85,7 @@ struct record_opts {
        u64          clockid_res_ns;
        int          nr_cblocks;
        int          affinity;
+       int          mmap_flush;
 };
 
 enum perf_affinity {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 08cedb643ea6..937039faac59 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option 
*opt, const char *str,
  */
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
                         unsigned int auxtrace_pages,
-                        bool auxtrace_overwrite, int nr_cblocks, int affinity)
+                        bool auxtrace_overwrite, int nr_cblocks, int affinity, 
int flush)
 {
        struct perf_evsel *evsel;
        const struct cpu_map *cpus = evlist->cpus;
@@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, 
unsigned int pages,
         * Its value is decided by evsel's write_backward.
         * So &mp should not be passed through const pointer.
         */
-       struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = 
affinity };
+       struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = 
affinity, .flush = flush };
 
        if (!evlist->mmap)
                evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, 
unsigned int pages,
 
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
 {
-       return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, 
PERF_AFFINITY_SYS);
+       return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, 
PERF_AFFINITY_SYS, 1);
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 744906dd4887..edf18811e39f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -165,7 +165,8 @@ unsigned long perf_event_mlock_kb_in_pages(void);
 
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
                         unsigned int auxtrace_pages,
-                        bool auxtrace_overwrite, int nr_cblocks, int affinity);
+                        bool auxtrace_overwrite, int nr_cblocks,
+                        int affinity, int flush);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdc7740fc181..ef3d79b2c90b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -440,6 +440,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct 
mmap_params *mp, int fd, int c
 
        perf_mmap__setup_affinity_mask(map, mp);
 
+       map->flush = mp->flush;
+
        if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
                                &mp->auxtrace_mp, map->base, fd))
                return -1;
@@ -492,7 +494,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md)
        md->start = md->overwrite ? head : old;
        md->end = md->overwrite ? old : head;
 
-       if (md->start == md->end)
+       if ((md->end - md->start) < md->flush)
                return -EAGAIN;
 
        size = md->end - md->start;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e566c19b242b..b82f8c2d55c4 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -39,6 +39,7 @@ struct perf_mmap {
        } aio;
 #endif
        cpu_set_t       affinity_mask;
+       u64             flush;
 };
 
 /*
@@ -70,7 +71,7 @@ enum bkw_mmap_state {
 };
 
 struct mmap_params {
-       int                         prot, mask, nr_cblocks, affinity;
+       int                         prot, mask, nr_cblocks, affinity, flush;
        struct auxtrace_mmap_params auxtrace_mp;
 };

Reply via email to