The new --index option will create indexed data file which can be
processed by multiple threads parallelly.  It saves meta event and
sample data in separate files and merges them with an index table.

If there's an index table in the data file, the HEADER_DATA_INDEX
feature bit is set and session->header.index[0] will point to the meta
event area, and rest are sample data.  It'd look like below:

        +---------------------+
        |     file header     |
        |---------------------|
        |                     |
        |    meta events[0] <-+--+
        |                     |  |
        |---------------------|  |
        |                     |  |
        |    sample data[1] <-+--+
        |                     |  |
        |---------------------|  |
        |                     |  |
        |    sample data[2] <-|--+
        |                     |  |
        |---------------------|  |
        |         ...         | ...
        |---------------------|  |
        |     feature data    |  |
        |   (contains index) -+--+
        +---------------------+

Signed-off-by: Namhyung Kim <namhy...@kernel.org>
---
 tools/perf/Documentation/perf-record.txt |   4 +
 tools/perf/builtin-record.c              | 172 ++++++++++++++++++++++++++++---
 tools/perf/perf.h                        |   1 +
 tools/perf/util/header.c                 |   2 +
 tools/perf/util/session.c                |   1 +
 5 files changed, 166 insertions(+), 14 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 280533ebf9df..7eac31f02f8c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -267,6 +267,10 @@ AUX area tracing event. Optionally the number of bytes to 
capture per
 snapshot can be specified. In Snapshot Mode, trace data is captured only when
 signal SIGUSR2 is received.
 
+--index::
+Build an index table for sample data.  This will speed up perf report by
+parallel processing.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 303116c9a38a..4ddf104f50ff 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -40,6 +40,7 @@ struct record {
        u64                     bytes_written;
        struct perf_data_file   file;
        struct auxtrace_record  *itr;
+       int                     *fds;
        struct perf_evlist      *evlist;
        struct perf_session     *session;
        const char              *progname;
@@ -49,9 +50,16 @@ struct record {
        long                    samples;
 };
 
-static int record__write(struct record *rec, void *bf, size_t size)
+static int record__write(struct record *rec, void *bf, size_t size, int idx)
 {
-       if (perf_data_file__write(rec->session->file, bf, size) < 0) {
+       int fd;
+
+       if (rec->fds && idx >= 0)
+               fd = rec->fds[idx];
+       else
+               fd = perf_data_file__fd(rec->session->file);
+
+       if (writen(fd, bf, size) < 0) {
                pr_err("failed to write perf data, error: %m\n");
                return -1;
        }
@@ -66,7 +74,7 @@ static int process_synthesized_event(struct perf_tool *tool,
                                     struct machine *machine __maybe_unused)
 {
        struct record *rec = container_of(tool, struct record, tool);
-       return record__write(rec, event, event->header.size);
+       return record__write(rec, event, event->header.size, -1);
 }
 
 static int record__mmap_read(struct record *rec, int idx)
@@ -91,7 +99,7 @@ static int record__mmap_read(struct record *rec, int idx)
                size = md->mask + 1 - (old & md->mask);
                old += size;
 
-               if (record__write(rec, buf, size) < 0) {
+               if (record__write(rec, buf, size, idx) < 0) {
                        rc = -1;
                        goto out;
                }
@@ -101,7 +109,7 @@ static int record__mmap_read(struct record *rec, int idx)
        size = head - old;
        old += size;
 
-       if (record__write(rec, buf, size) < 0) {
+       if (record__write(rec, buf, size, idx) < 0) {
                rc = -1;
                goto out;
        }
@@ -149,6 +157,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
        struct perf_data_file *file = &rec->file;
        size_t padding;
        u8 pad[8] = {0};
+       int idx = event->auxtrace.idx;
 
        if (!perf_data_file__is_pipe(file)) {
                off_t file_offset;
@@ -169,11 +178,11 @@ static int record__process_auxtrace(struct perf_tool 
*tool,
        if (padding)
                padding = 8 - padding;
 
-       record__write(rec, event, event->header.size);
-       record__write(rec, data1, len1);
+       record__write(rec, event, event->header.size, idx);
+       record__write(rec, data1, len1, idx);
        if (len2)
-               record__write(rec, data2, len2);
-       record__write(rec, &pad, padding);
+               record__write(rec, data2, len2, idx);
+       record__write(rec, &pad, padding, idx);
 
        return 0;
 }
@@ -266,6 +275,110 @@ int auxtrace_record__snapshot_start(struct 
auxtrace_record *itr __maybe_unused)
 
 #endif
 
+#define INDEX_FILE_FMT  "%s.dir/perf.data.%d"
+
+static int record__create_index_files(struct record *rec, int nr_index)
+{
+       int i = 0;
+       int ret = -1;
+       char path[PATH_MAX];
+       struct perf_data_file *file = &rec->file;
+
+       rec->fds = malloc(nr_index * sizeof(int));
+       if (rec->fds == NULL)
+               return -ENOMEM;
+
+       scnprintf(path, sizeof(path), "%s.dir", file->path);
+       if (rm_rf(path) < 0 || mkdir(path, S_IRWXU) < 0)
+               goto out_err;
+
+       for (i = 0; i < nr_index; i++) {
+               scnprintf(path, sizeof(path), INDEX_FILE_FMT, file->path, i);
+               ret = open(path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
+               if (ret < 0)
+                       goto out_err;
+
+               rec->fds[i] = ret;
+       }
+       return 0;
+
+out_err:
+       while (--i >= 1)
+               close(rec->fds[i]);
+       zfree(&rec->fds);
+
+       scnprintf(path, sizeof(path), "%s.dir", file->path);
+       rm_rf(path);
+
+       return ret;
+}
+
+static int record__merge_index_files(struct record *rec, int nr_index)
+{
+       int i;
+       int ret = -ENOMEM;
+       u64 offset;
+       char path[PATH_MAX];
+       struct perf_file_section *idx;
+       struct perf_data_file *file = &rec->file;
+       struct perf_session *session = rec->session;
+       int output_fd = perf_data_file__fd(file);
+
+       /* +1 for header file itself */
+       nr_index++;
+
+       idx = calloc(nr_index, sizeof(*idx));
+       if (idx == NULL)
+               goto out_close;
+
+       offset = lseek(output_fd, 0, SEEK_END);
+
+       idx[0].offset = session->header.data_offset;
+       idx[0].size   = offset - idx[0].offset;
+
+       for (i = 1; i < nr_index; i++) {
+               struct stat stbuf;
+               int fd = rec->fds[i - 1];
+
+               ret = fstat(fd, &stbuf);
+               if (ret < 0)
+                       goto out_close;
+
+               idx[i].offset = offset;
+               idx[i].size   = stbuf.st_size;
+
+               offset += stbuf.st_size;
+
+               if (idx[i].size == 0)
+                       continue;
+
+               ret = copyfile_offset(fd, 0, output_fd, idx[i].offset,
+                                     idx[i].size);
+               if (ret < 0)
+                       goto out_close;
+       }
+
+       session->header.index = idx;
+       session->header.nr_index = nr_index;
+
+       perf_has_index = true;
+
+       ret = 0;
+
+out_close:
+       if (ret < 0)
+               pr_err("failed to merge index files: %d\n", ret);
+
+       for (i = 0; i < nr_index - 1; i++)
+               close(rec->fds[i]);
+
+       scnprintf(path, sizeof(path), "%s.dir", file->path);
+       rm_rf(path);
+
+       zfree(&rec->fds);
+       return ret;
+}
+
 static int record__open(struct record *rec)
 {
        char msg[512];
@@ -304,7 +417,8 @@ static int record__open(struct record *rec)
 
        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
                                 opts->auxtrace_mmap_pages,
-                                opts->auxtrace_snapshot_mode, false) < 0) {
+                                opts->auxtrace_snapshot_mode,
+                                opts->index) < 0) {
                if (errno == EPERM) {
                        pr_err("Permission error mapping pages.\n"
                               "Consider increasing "
@@ -321,6 +435,14 @@ static int record__open(struct record *rec)
                goto out;
        }
 
+       if (opts->index) {
+               rc = record__create_index_files(rec, evlist->nr_mmaps);
+               if (rc < 0) {
+                       pr_err("failed to create index file: %d\n", rc);
+                       goto out;
+               }
+       }
+
        session->evlist = evlist;
        perf_session__set_id_hdr_size(session);
 out:
@@ -345,7 +467,8 @@ static int process_buildids(struct record *rec)
        struct perf_data_file *file  = &rec->file;
        struct perf_session *session = rec->session;
 
-       u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+       /* update file size after merging sample files with index */
+       u64 size = lseek(perf_data_file__fd(file), 0, SEEK_END);
        if (size == 0)
                return 0;
 
@@ -415,7 +538,7 @@ static int record__mmap_read_all(struct record *rec)
                        }
                }
 
-               if (rec->evlist->track_mmap[i].base) {
+               if (rec->evlist->track_mmap && rec->evlist->track_mmap[i].base) 
{
                        if (record__mmap_read(rec, track_mmap_idx(i)) != 0) {
                                rc = -1;
                                goto out;
@@ -434,7 +557,8 @@ static int record__mmap_read_all(struct record *rec)
         * at least one event.
         */
        if (bytes_written != rec->bytes_written)
-               rc = record__write(rec, &finished_round_event, 
sizeof(finished_round_event));
+               rc = record__write(rec, &finished_round_event,
+                                  sizeof(finished_round_event), -1);
 
 out:
        return rc;
@@ -460,7 +584,8 @@ static void record__init_features(struct record *rec)
        if (!rec->opts.full_auxtrace)
                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 
-       perf_header__clear_feat(&session->header, HEADER_DATA_INDEX);
+       if (!rec->opts.index)
+               perf_header__clear_feat(&session->header, HEADER_DATA_INDEX);
 }
 
 static volatile int workload_exec_errno;
@@ -528,6 +653,11 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
                }
        }
 
+       if (file->is_pipe && opts->index) {
+               pr_warning("Indexing is disabled for pipe output\n");
+               opts->index = false;
+       }
+
        if (record__open(rec) != 0) {
                err = -1;
                goto out_child;
@@ -729,6 +859,9 @@ static int __cmd_record(struct record *rec, int argc, const 
char **argv)
        if (!err && !file->is_pipe) {
                rec->session->header.data_size += rec->bytes_written;
 
+               if (rec->opts.index)
+                       record__merge_index_files(rec, rec->evlist->nr_mmaps);
+
                if (!rec->no_buildid) {
                        process_buildids(rec);
                        /*
@@ -1166,6 +1299,8 @@ struct option __record_options[] = {
        parse_clockid),
        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
                          "opts", "AUX area tracing Snapshot Mode", ""),
+       OPT_BOOLEAN(0, "index", &record.opts.index,
+                   "make index for sample data to speed-up processing"),
        OPT_END()
 };
 
@@ -1228,6 +1363,15 @@ int cmd_record(int argc, const char **argv, const char 
*prefix __maybe_unused)
                goto out_symbol_exit;
        }
 
+       if (rec->opts.index) {
+               if (!rec->opts.sample_time) {
+                       pr_err("Sample timestamp is required for indexing\n");
+                       goto out_symbol_exit;
+               }
+
+               perf_evlist__add_dummy_tracking(rec->evlist);
+       }
+
        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
                rec->opts.no_inherit = true;
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 61ce68d5c59f..192d936020ea 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -58,6 +58,7 @@ struct record_opts {
        bool         running_time;
        bool         full_auxtrace;
        bool         auxtrace_snapshot_mode;
+       bool         index;
        unsigned int freq;
        unsigned int mmap_pages;
        unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1e10f4bc664d..796a958d951a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2652,6 +2652,8 @@ int perf_session__read_header(struct perf_session 
*session)
                                                   session->tevent.pevent))
                goto out_delete_evlist;
 
+       perf_has_index = perf_header__has_feat(&session->header, 
HEADER_DATA_INDEX);
+
        return 0;
 out_errno:
        return -errno;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index edb6ca22bff2..bc738216de36 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -195,6 +195,7 @@ void perf_session__delete(struct perf_session *session)
        machines__exit(&session->machines);
        if (session->file)
                perf_data_file__close(session->file);
+       free(session->header.index);
        free(session);
 }
 
-- 
2.4.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to