Currently 'perf sched record' commandsrecords various task stat info but
other perf sched commands don't use them.  This is a very simple
implementation of showing those info to users.

  $ sudo perf sched record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.346 MB perf.data (774 samples) ]

  $ sudo perf sched runtime | head
    TID:COMM                  Run-time    Sleep-time     Wait-time   
IOWait-time  Switch  Migrate
  ----------------------  ------------  ------------  ------------  
------------  ------  -------
   1347:Socket Thread         20901793    4895322293             0             
0       2        2
  14855:sshd                   7404420     660792968             0             
0       4        0
   1342:firefox                3657944     998405339         82358             
0      11        0
   1360:Timer                  1429014    1002239227             0             
0      11        0
    170:systemd-journal        1209607     644878917             0             
0       1        0
      7:rcu_preempt            1081471     958866004        416514             
0      26        6
  14856:sshd                    973632     506809918             0             
0       6        0
  14859:sleep                   887558    1030418783          4055             
0       3        1

Cc: David Ahern <dsah...@gmail.com>
Signed-off-by: Namhyung Kim <namhy...@kernel.org>
---
 tools/perf/builtin-sched.c | 361 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 359 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 33962612a5e9..112596121242 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -98,6 +98,18 @@ struct work_atoms {
        int                     num_merged;
 };
 
+struct task_runtime {
+       struct list_head        list;
+       struct rb_node          node;
+       struct thread           *thread;
+       u64                     run_time;
+       u64                     sleep_time;
+       u64                     wait_time;
+       u64                     iowait_time;
+       int                     num_switch;
+       int                     num_migrate;
+};
+
 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
 
 struct perf_sched;
@@ -109,6 +121,15 @@ struct trace_sched_handler {
        int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
                             struct perf_sample *sample, struct machine 
*machine);
 
+       int (*sleep_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+                          struct perf_sample *sample, struct machine *machine);
+
+       int (*wait_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+                         struct perf_sample *sample, struct machine *machine);
+
+       int (*iowait_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+                           struct perf_sample *sample, struct machine 
*machine);
+
        int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
                            struct perf_sample *sample, struct machine 
*machine);
 
@@ -170,7 +191,7 @@ struct perf_sched {
        u64              all_count;
        u64              cpu_last_switched[MAX_CPUS];
        struct rb_root   atom_root, sorted_atom_root, merged_atom_root;
-       struct list_head sort_list, cmp_pid;
+       struct list_head sort_list, cmp_pid, runtime_list;
        bool force;
        bool skip_merge;
 };
@@ -1424,6 +1445,120 @@ static int map_switch_event(struct perf_sched *sched, 
struct perf_evsel *evsel,
        return 0;
 }
 
+static struct task_runtime *get_task_runtime(struct perf_sched *sched,
+                                            struct machine *machine, int pid)
+{
+       struct thread *th;
+       struct task_runtime *tr;
+
+       th = machine__findnew_thread(machine, -1, pid);
+       if (th == NULL)
+               return NULL;
+
+       tr = thread__priv(th);
+       if (tr == NULL) {
+               tr = zalloc(sizeof(*tr));
+               if (tr == NULL)
+                       return NULL;
+
+               thread__set_priv(th, tr);
+
+               /* hold a refcnt not to free the thread */
+               tr->thread = thread__get(th);
+
+               list_add(&tr->list, &sched->runtime_list);
+       }
+       return tr;
+}
+
+static int runtime_switch_event(struct perf_sched *sched, struct perf_evsel 
*evsel,
+                               struct perf_sample *sample, struct machine 
*machine)
+{
+       const u32 pid = perf_evsel__intval(evsel, sample, "prev_pid");
+       struct task_runtime *tr;
+
+       tr = get_task_runtime(sched, machine, pid);
+       if (tr == NULL)
+               return -1;
+
+       tr->num_switch++;
+       return 0;
+}
+
+static int runtime_runtime_event(struct perf_sched *sched, struct perf_evsel 
*evsel,
+                                struct perf_sample *sample, struct machine 
*machine)
+{
+       const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+       const u64 time = perf_evsel__intval(evsel, sample, "runtime");
+       struct task_runtime *tr;
+
+       tr = get_task_runtime(sched, machine, pid);
+       if (tr == NULL)
+               return -1;
+
+       tr->run_time += time;
+       return 0;
+}
+
+static int runtime_sleep_event(struct perf_sched *sched, struct perf_evsel 
*evsel,
+                              struct perf_sample *sample, struct machine 
*machine)
+{
+       const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+       const u64 time = perf_evsel__intval(evsel, sample, "delay");
+       struct task_runtime *tr;
+
+       tr = get_task_runtime(sched, machine, pid);
+       if (tr == NULL)
+               return -1;
+
+       tr->sleep_time += time;
+       return 0;
+}
+
+static int runtime_wait_event(struct perf_sched *sched, struct perf_evsel 
*evsel,
+                             struct perf_sample *sample, struct machine 
*machine)
+{
+       const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+       const u64 time = perf_evsel__intval(evsel, sample, "delay");
+       struct task_runtime *tr;
+
+       tr = get_task_runtime(sched, machine, pid);
+       if (tr == NULL)
+               return -1;
+
+       tr->wait_time += time;
+       return 0;
+}
+
+static int runtime_iowait_event(struct perf_sched *sched, struct perf_evsel 
*evsel,
+                               struct perf_sample *sample, struct machine 
*machine)
+{
+       const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+       const u64 time = perf_evsel__intval(evsel, sample, "delay");
+       struct task_runtime *tr;
+
+       tr = get_task_runtime(sched, machine, pid);
+       if (tr == NULL)
+               return -1;
+
+       tr->iowait_time += time;
+       return 0;
+}
+
+static int runtime_migrate_event(struct perf_sched *sched, struct perf_evsel 
*evsel,
+                                struct perf_sample *sample, struct machine 
*machine)
+{
+       const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+       struct task_runtime *tr;
+
+       tr = get_task_runtime(sched, machine, pid);
+       if (tr == NULL)
+               return -1;
+
+       tr->num_migrate++;
+       return 0;
+}
+
 static int process_sched_switch_event(struct perf_tool *tool,
                                      struct perf_evsel *evsel,
                                      struct perf_sample *sample,
@@ -1463,6 +1598,45 @@ static int process_sched_runtime_event(struct perf_tool 
*tool,
        return 0;
 }
 
+static int process_sched_sleep_event(struct perf_tool *tool,
+                                    struct perf_evsel *evsel,
+                                    struct perf_sample *sample,
+                                    struct machine *machine)
+{
+       struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+       if (sched->tp_handler->sleep_event)
+               return sched->tp_handler->sleep_event(sched, evsel, sample, 
machine);
+
+       return 0;
+}
+
+static int process_sched_wait_event(struct perf_tool *tool,
+                                   struct perf_evsel *evsel,
+                                   struct perf_sample *sample,
+                                   struct machine *machine)
+{
+       struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+       if (sched->tp_handler->wait_event)
+               return sched->tp_handler->wait_event(sched, evsel, sample, 
machine);
+
+       return 0;
+}
+
+static int process_sched_iowait_event(struct perf_tool *tool,
+                                     struct perf_evsel *evsel,
+                                     struct perf_sample *sample,
+                                     struct machine *machine)
+{
+       struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+       if (sched->tp_handler->iowait_event)
+               return sched->tp_handler->iowait_event(sched, evsel, sample, 
machine);
+
+       return 0;
+}
+
 static int perf_sched__process_fork_event(struct perf_tool *tool,
                                          union perf_event *event,
                                          struct perf_sample *sample,
@@ -1560,6 +1734,143 @@ out_delete:
        return rc;
 }
 
+static int perf_sched__read_runtime_events(struct perf_sched *sched)
+{
+       const struct perf_evsel_str_handler handlers[] = {
+               { "sched:sched_switch",       process_sched_switch_event, },
+               { "sched:sched_stat_runtime", process_sched_runtime_event, },
+               { "sched:sched_stat_sleep",   process_sched_sleep_event, },
+               { "sched:sched_stat_wait",    process_sched_wait_event, },
+               { "sched:sched_stat_iowait",  process_sched_iowait_event, },
+               { "sched:sched_migrate_task", process_sched_migrate_task_event, 
},
+       };
+       struct perf_session *session;
+       struct perf_data_file file = {
+               .path = input_name,
+               .mode = PERF_DATA_MODE_READ,
+               .force = sched->force,
+       };
+       int rc = -1;
+
+       session = perf_session__new(&file, false, &sched->tool);
+       if (session == NULL) {
+               pr_debug("No Memory for session\n");
+               return -1;
+       }
+
+       symbol__init(&session->header.env);
+
+       if (perf_session__set_tracepoints_handlers(session, handlers))
+               goto out_delete;
+
+       if (perf_session__has_traces(session, "record -R")) {
+               int err = perf_session__process_events(session);
+               if (err) {
+                       pr_err("Failed to process events, error %d", err);
+                       goto out_delete;
+               }
+
+               sched->nr_events      = session->evlist->stats.nr_events[0];
+               sched->nr_lost_events = session->evlist->stats.total_lost;
+               sched->nr_lost_chunks = 
session->evlist->stats.nr_events[PERF_RECORD_LOST];
+       }
+
+       rc = 0;
+out_delete:
+       perf_session__delete(session);
+       return rc;
+}
+
+static int task_runtime_cmp(struct perf_sched *sched __maybe_unused,
+                           struct task_runtime *a, struct task_runtime *b)
+{
+       return b->run_time - a->run_time;
+}
+
+static void sort_task_runtime(struct perf_sched *sched, struct task_runtime 
*tr)
+{
+       struct rb_root *root = &sched->sorted_atom_root;
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+
+       while (*p) {
+               struct task_runtime *iter;
+               int cmp;
+
+               parent = *p;
+               iter = rb_entry(parent, struct task_runtime, node);
+
+               cmp = task_runtime_cmp(sched, iter, tr);
+
+               if (cmp > 0)
+                       p = &parent->rb_left;
+               else
+                       p = &parent->rb_right;
+       }
+
+       rb_link_node(&tr->node, parent, p);
+       rb_insert_color(&tr->node, root);
+}
+
+static void perf_sched__sort_runtime(struct perf_sched *sched)
+{
+       while (!list_empty(&sched->runtime_list)) {
+               struct task_runtime *tr;
+
+               tr = list_first_entry(&sched->runtime_list,
+                                     struct task_runtime, list);
+               list_del(&tr->list);
+
+               sort_task_runtime(sched, tr);
+       }
+}
+
+static void print_task_runtime(struct task_runtime *tr)
+{
+       struct thread *th = tr->thread;
+
+       /*      tid:comm   run_time     sleep_time   wait_time    iowait_time  
switch  migrate*/
+       printf("%5d:%-16s  %12"PRIu64"  %12"PRIu64"  %12"PRIu64"  %12"PRIu64"  
%6d  %7d\n",
+              th->tid, thread__comm_str(th), tr->run_time, tr->sleep_time,
+              tr->wait_time, tr->iowait_time, tr->num_switch, tr->num_migrate);
+}
+
+static void perf_sched__print_runtime(struct perf_sched *sched)
+{
+       struct task_runtime *tr;
+       struct rb_root *root = &sched->sorted_atom_root;
+       struct rb_node *node = rb_first(root);
+
+       printf("%5s:%-16s  %12s  %12s  %12s  %12s  %6s  %7s\n",
+              "TID", "COMM", "Run-time", "Sleep-time", "Wait-time",
+              "IOWait-time", "Switch", "Migrate");
+       printf("%.22s  %.12s  %.12s  %.12s  %.12s  %.6s  %.7s\n",
+              graph_dotted_line, graph_dotted_line, graph_dotted_line,
+              graph_dotted_line, graph_dotted_line, graph_dotted_line,
+              graph_dotted_line);
+
+       while (node) {
+               tr = rb_entry(node, struct task_runtime, node);
+               print_task_runtime(tr);
+
+               node = rb_next(node);
+       }
+
+       /* release thread refcnt */
+       node = rb_first(root);
+       while (node) {
+               struct rb_node *next = rb_next(node);
+               rb_erase(node, root);
+
+               tr = rb_entry(node, struct task_runtime, node);
+               thread__set_priv(tr->thread, NULL);
+               thread__put(tr->thread);
+               free(tr);
+
+               node = next;
+       }
+}
+
 static void print_bad_events(struct perf_sched *sched)
 {
        if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
@@ -1720,6 +2031,23 @@ static int perf_sched__replay(struct perf_sched *sched)
        return 0;
 }
 
+static int perf_sched__runtime(struct perf_sched *sched)
+{
+       setup_pager();
+
+       INIT_LIST_HEAD(&sched->runtime_list);
+       if (perf_sched__read_runtime_events(sched))
+               return -1;
+
+       perf_sched__sort_runtime(sched);
+       perf_sched__print_runtime(sched);
+
+       print_bad_events(sched);
+       printf("\n");
+
+       return 0;
+}
+
 static void setup_sorting(struct perf_sched *sched, const struct option 
*options,
                          const char * const usage_msg[])
 {
@@ -1821,6 +2149,14 @@ int cmd_sched(int argc, const char **argv, const char 
*prefix __maybe_unused)
        OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
        OPT_END()
        };
+       const struct option runtime_options[] = {
+       OPT_INCR('v', "verbose", &verbose,
+                   "be more verbose (show symbol address, etc)"),
+       OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+                   "dump raw trace in ASCII"),
+       OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
+       OPT_END()
+       };
        const struct option sched_options[] = {
        OPT_STRING('i', "input", &input_name, "file",
                    "input file name"),
@@ -1838,8 +2174,13 @@ int cmd_sched(int argc, const char **argv, const char 
*prefix __maybe_unused)
                "perf sched replay [<options>]",
                NULL
        };
+       const char * const runtime_usage[] = {
+               "perf sched runtime [<options>]",
+               NULL
+       };
        const char *const sched_subcommands[] = { "record", "latency", "map",
-                                                 "replay", "script", NULL };
+                                                 "replay", "script", "runtime",
+                                                 NULL };
        const char *sched_usage[] = {
                NULL,
                NULL
@@ -1858,6 +2199,14 @@ int cmd_sched(int argc, const char **argv, const char 
*prefix __maybe_unused)
                .switch_event       = replay_switch_event,
                .fork_event         = replay_fork_event,
        };
+       struct trace_sched_handler runtime_ops  = {
+               .switch_event       = runtime_switch_event,
+               .runtime_event      = runtime_runtime_event,
+               .sleep_event        = runtime_sleep_event,
+               .wait_event         = runtime_wait_event,
+               .iowait_event       = runtime_iowait_event,
+               .migrate_task_event = runtime_migrate_event,
+       };
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
@@ -1897,6 +2246,14 @@ int cmd_sched(int argc, const char **argv, const char 
*prefix __maybe_unused)
                                usage_with_options(replay_usage, 
replay_options);
                }
                return perf_sched__replay(&sched);
+       } else if (!strncmp(argv[0], "run", 3)) {
+               sched.tp_handler = &runtime_ops;
+               if (argc) {
+                       argc = parse_options(argc, argv, runtime_options, 
runtime_usage, 0);
+                       if (argc)
+                               usage_with_options(runtime_usage, 
runtime_options);
+               }
+               return perf_sched__runtime(&sched);
        } else {
                usage_with_options(sched_usage, sched_options);
        }
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to