From: Kan Liang <kan.li...@intel.com>

perf_top__mmap_read has severe performance issue in
Knights Landing/Mill, when monitoring in heavy load system. It costs
several minutes to finish, which is unacceptable.

perf top was overwrite mode. But it is changed to non overwrite mode
since commit 93fc64f14472 ("perf top: Switch to non overwrite mode").
For non overwrite mode, it tries to read everything in the ring buffer
and does not check the messup. Once there are lots of samples delivered
shortly, the processing time could be very long.
Knights Landing/Mill as a manycore processor contains a large number of
small cores. Because of the huge core number, it will generated lots of
samples in a heavy load system. Also, since the huge sample#, the mmap
writer probably bite the tail and mess up the samples.

Also, to avoid the problems which is described in 
commit 9ecda41acb97 ("perf/core: Add ::write_backward attribute to perf
event"), switch to backward overwrite mode.
Pausing the ring-buffer during perf_top__mmap_read to ensure the
ring-buffer is stable.
There would be some records lost in backward overwrite mode. Removing
the lost events checking.

Signed-off-by: Kan Liang <kan.li...@intel.com>
---
 tools/perf/builtin-top.c       | 21 +++++++++------------
 tools/perf/ui/browsers/hists.c | 12 +++++++++---
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 477a869..03090d0 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -276,16 +276,6 @@ static void perf_top__print_sym_table(struct perf_top *top)
 
        printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
-       if (hists->stats.nr_lost_warned !=
-           hists->stats.nr_events[PERF_RECORD_LOST]) {
-               hists->stats.nr_lost_warned =
-                             hists->stats.nr_events[PERF_RECORD_LOST];
-               color_fprintf(stdout, PERF_COLOR_RED,
-                             "WARNING: LOST %d chunks, Check IO/CPU overload",
-                             hists->stats.nr_lost_warned);
-               ++printed;
-       }
-
        if (top->sym_filter_entry) {
                perf_top__show_details(top);
                return;
@@ -802,6 +792,7 @@ static void perf_event__process_sample(struct perf_tool 
*tool,
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
+       struct perf_mmap *md = &top->evlist->backward_mmap[idx];
        struct perf_sample sample;
        struct perf_evsel *evsel;
        struct perf_session *session = top->session;
@@ -809,7 +800,9 @@ static void perf_top__mmap_read_idx(struct perf_top *top, 
int idx)
        struct machine *machine;
        int ret;
 
-       while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
+       perf_mmap__read_catchup(md);
+
+       while ((event = perf_mmap__read_backward(md)) != NULL) {
                ret = perf_evlist__parse_sample(top->evlist, event, &sample);
                if (ret) {
                        pr_err("Can't parse sample, err = %d\n", ret);
@@ -872,8 +865,11 @@ static void perf_top__mmap_read(struct perf_top *top)
 {
        int i;
 
+       perf_evlist__toggle_bkw_mmap(top->evlist, BKW_MMAP_DATA_PENDING);
        for (i = 0; i < top->evlist->nr_mmaps; i++)
                perf_top__mmap_read_idx(top, i);
+       perf_evlist__toggle_bkw_mmap(top->evlist, BKW_MMAP_EMPTY);
+       perf_evlist__toggle_bkw_mmap(top->evlist, BKW_MMAP_RUNNING);
 }
 
 static int perf_top__start_counters(struct perf_top *top)
@@ -902,7 +898,7 @@ static int perf_top__start_counters(struct perf_top *top)
                }
        }
 
-       if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+       if (perf_evlist__mmap(evlist, opts->mmap_pages, opts->overwrite) < 0) {
                ui__error("Failed to mmap with %d (%s)\n",
                            errno, str_error_r(errno, msg, sizeof(msg)));
                goto out_err;
@@ -1117,6 +1113,7 @@ int cmd_top(int argc, const char **argv)
                                .uses_mmap   = true,
                        },
                        .proc_map_timeout    = 500,
+                       .overwrite      = 1,
                },
                .max_stack           = sysctl_perf_event_max_stack,
                .sym_pcnt_filter     = 5,
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 13dfb0a..7419c05 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -712,8 +712,13 @@ int hist_browser__run(struct hist_browser *browser, const 
char *help)
                        nr_entries = hist_browser__nr_entries(browser);
                        ui_browser__update_nr_entries(&browser->b, nr_entries);
 
-                       if (browser->hists->stats.nr_lost_warned !=
-                           browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
+                       /*
+                        * Don't print lost events warning for perf top,
+                        * because it is backward overwrite mode.
+                        * Perf top is the only tool which has hbt timer.
+                        */
+                       if ((browser->hists->stats.nr_lost_warned !=
+                           browser->hists->stats.nr_events[PERF_RECORD_LOST]) 
&& !hbt) {
                                browser->hists->stats.nr_lost_warned =
                                        
browser->hists->stats.nr_events[PERF_RECORD_LOST];
                                ui_browser__warn_lost_events(&browser->b);
@@ -3358,7 +3363,8 @@ static int perf_evsel_menu__run(struct perf_evsel_menu 
*menu,
                case K_TIMER:
                        hbt->timer(hbt->arg);
 
-                       if (!menu->lost_events_warned && menu->lost_events) {
+                       if (!menu->lost_events_warned && menu->lost_events &&
+                           !hbt) {
                                ui_browser__warn_lost_events(&menu->b);
                                menu->lost_events_warned = true;
                        }
-- 
2.5.5

Reply via email to