On Sun, Oct 14, 2018 at 11:47 PM Alexey Budankov
<alexey.budan...@linux.intel.com> wrote:
>
>
> Multi AIO trace writing allows caching more kernel data into userspace
> memory postponing trace writing for the sake of overall profiling data
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
>
> With aio option value different from 0, default value is 1,
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
>
> That allows avoiding suspend at record__aio_sync() between calls of
> record__mmap_read_evlist() and increase profiling data thruput for
> the cost of userspace memory.
>
> Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com>
> ---
> Changes in v14:
> - fix --aio option handling
> Changes in v13:
> - preserved --aio option name avoiding complication
> Changes in v12:
> - extended --aio option to --aio-cblocks=<n>
> Changes in v10:
> - added description of aio-cblocks option into perf-record.tx
> ---
>  tools/perf/Documentation/perf-record.txt |  4 +-
>  tools/perf/builtin-record.c              | 64 
> ++++++++++++++++++++++++--------
>  tools/perf/util/mmap.c                   | 64 
> +++++++++++++++++++++-----------
>  tools/perf/util/mmap.h                   |  9 +++--
>  4 files changed, 99 insertions(+), 42 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt 
> b/tools/perf/Documentation/perf-record.txt
> index 7efb4af88a68..d232b13ea713 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo.
>  --buildid-all::
>  Record build-id of all DSOs regardless whether it's actually hit or not.
>
> ---aio::
> -Enable asynchronous (Posix AIO) trace writing mode.
> +--aio[=n]::
> +Use <n> control blocks in asynchronous (Posix AIO) trace writing mode 
> (default: 1, max: 4).
>  Asynchronous mode is supported only when linking Perf tool with libc library
>  providing implementation for Posix AIO API.
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 0c6105860123..c5d9288b8670 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
> struct aiocb *cblock)
>         return rc;
>  }
>
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
>  {
> -       struct aiocb *cblock = &md->aio.cblock;
> +       struct aiocb **aiocb = md->aio.aiocb;
> +       struct aiocb *cblocks = md->aio.cblocks;
>         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
> +       int i, do_suspend;
>
>         do {
> -               if (cblock->aio_fildes == -1 || record__aio_complete(md, 
> cblock))
> -                       return;
> +               do_suspend = 0;
> +               for (i = 0; i < md->aio.nr_cblocks; ++i) {
> +                       if (cblocks[i].aio_fildes == -1 || 
> record__aio_complete(md, &cblocks[i])) {
> +                               if (sync_all)
> +                                       aiocb[i] = NULL;
> +                               else
> +                                       return i;
> +                       } else {
> +                               /*
> +                                * Started aio write is not complete yet
> +                                * so it has to be waited before the
> +                                * next allocation.
> +                                */
> +                               aiocb[i] = &cblocks[i];
> +                               do_suspend = 1;
> +                       }
> +               }
> +               if (!do_suspend)
> +                       return -1;
>
> -               while (aio_suspend((const struct aiocb**)&cblock, 1, 
> &timeout)) {
> +               while (aio_suspend((const struct aiocb **)aiocb, 
> md->aio.nr_cblocks, &timeout)) {
>                         if (!(errno == EAGAIN || errno == EINTR))
>                                 pr_err("failed to sync perf data, error: 
> %m\n");
>                 }
> @@ -252,28 +271,33 @@ static void record__aio_mmap_read_sync(struct record 
> *rec)
>                 struct perf_mmap *map = &maps[i];
>
>                 if (map->base)
> -                       record__aio_sync(map);
> +                       record__aio_sync(map, true);
>         }
>  }
>
>  static int nr_cblocks_default = 1;
>
>  static int record__aio_parse(const struct option *opt,
> -                            const char *str __maybe_unused,
> +                            const char *str,
>                              int unset)
>  {
>         struct record_opts *opts = (struct record_opts *)opt->value;
>
> -       if (unset)
> +       if (unset) {
>                 opts->nr_cblocks = 0;
> -       else
> -               opts->nr_cblocks = nr_cblocks_default;
> +       } else {
> +               if (str)
> +                       opts->nr_cblocks = strtol(str, NULL, 0);
> +               if (!opts->nr_cblocks)
> +                       opts->nr_cblocks = nr_cblocks_default;
> +       }
>
>         return 0;
>  }
>  #else /* HAVE_AIO_SUPPORT */
> -static void record__aio_sync(struct perf_mmap *md __maybe_unused)
> +static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool 
> sync_all __maybe_unused)
>  {
> +       return -1;
>  }
>
>  static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock 
> __maybe_unused,
> @@ -723,12 +747,13 @@ static int record__mmap_read_evlist(struct record *rec, 
> struct perf_evlist *evli
>                                         goto out;
>                                 }
>                         } else {
> +                               int idx;
>                                 /*
>                                  * Call record__aio_sync() to wait till 
> map->data buffer
>                                  * becomes available after previous aio write 
> request.
>                                  */
> -                               record__aio_sync(map);
> -                               if (perf_mmap__aio_push(map, rec, 
> record__aio_pushfn, &off) != 0) {
> +                               idx = record__aio_sync(map, false);
> +                               if (perf_mmap__aio_push(map, rec, idx, 
> record__aio_pushfn, &off) != 0) {
>                                         record__aio_set_pos(trace_fd, off);
>                                         rc = -1;
>                                         goto out;
> @@ -1492,6 +1517,13 @@ static int perf_record_config(const char *var, const 
> char *value, void *cb)
>                 var = "call-graph.record-mode";
>                 return perf_default_config(var, value, cb);
>         }
> +#ifdef HAVE_AIO_SUPPORT
> +       if (!strcmp(var, "record.aio")) {
> +               rec->opts.nr_cblocks = strtol(value, NULL, 0);
> +               if (!rec->opts.nr_cblocks)
> +                       rec->opts.nr_cblocks = nr_cblocks_default;
> +       }
> +#endif
>
>         return 0;
>  }
> @@ -1884,8 +1916,8 @@ static struct option __record_options[] = {
>         OPT_BOOLEAN(0, "dry-run", &dry_run,
>                     "Parse options then exit"),
>  #ifdef HAVE_AIO_SUPPORT
> -       OPT_CALLBACK_NOOPT(0, "aio", &record.opts,
> -                    NULL, "Enable asynchronous trace writing mode",
> +       OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
> +                    &nr_cblocks_default, "n", "Use <n> control blocks in 
> asynchronous trace writing mode (default: 1, max: 4)",
>                      record__aio_parse),
>  #endif
>         OPT_END()
> @@ -2080,6 +2112,8 @@ int cmd_record(int argc, const char **argv)
>                 goto out;
>         }
>
> +       if (rec->opts.nr_cblocks > 4)
> +               rec->opts.nr_cblocks = 4;
nit: I feel this logic belongs to record__aio_parse(). And we should have

static int nr_cblocks_max = 4;

Other than this

Acked-by: Song Liu <songliubrav...@fb.com>

Reply via email to