Perf surpasses the limit of MAX_NR_CPUS and MAX_CACHES while attempting to profile 1024 or more CPUs. Increase and/or make each limit dynamic to regain normal functionality.
Before: perf record -a [ perf record: Woken up X times to write data ] way too many cpu caches.. [ perf record: Captured and wrote X MB perf.data (X samples) ] perf report -C 1024 Error: failed to set cpu bitmap Requested CPU 1024 too large. Consider raising MAX_NR_CPUS After: perf record -a [ perf record: Woken up X times to write data ] [ perf record: Captured and wrote X MB perf.data (X samples) ] perf report -C 1024 ... The variables nr_cpus_onln and max_caches are alternatives for MAX_NR_CPUS and MAX_CACHES, they are initialized at runtime. MAX_NR_CPUS is increased from 1024 to 2048 as it is still used by DECLARE_BITMAP() at compile time, nr_cpus_onln replaces it elsewhere throughout perf. Cc: Peter Zijlstra <pet...@infradead.org> Cc: Ingo Molnar <mi...@redhat.com> Cc: Arnaldo Carvalho de Melo <a...@kernel.org> Cc: Alexander Shishkin <alexander.shish...@linux.intel.com> Cc: Jiri Olsa <jo...@redhat.com> Cc: Namhyung Kim <namhy...@kernel.org> Cc: linux-kernel@vger.kernel.org Cc: Russ Anderson <russ.ander...@hpe.com> Signed-off-by: Kyle Meyer <kyle.me...@hpe.com> --- tools/perf/perf.c | 6 ++++++ tools/perf/perf.h | 3 ++- tools/perf/util/cpumap.c | 6 +++--- tools/perf/util/header.c | 7 +++---- tools/perf/util/machine.c | 11 +++++------ tools/perf/util/session.c | 5 ++--- tools/perf/util/stat.c | 4 ++-- tools/perf/util/svghelper.c | 10 +++++----- 8 files changed, 28 insertions(+), 24 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 72df4b6fa36f..c2c22476a65f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -427,6 +427,12 @@ int main(int argc, const char **argv) const char *cmd; char sbuf[STRERR_BUFSIZE]; + nr_cpus_onln = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus_onln < 0) { + fprintf(stderr, "Cannot determine the number of CPUs currently online.\n"); + goto out; + } + /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); pager_init(PERF_PAGER_ENVIRONMENT); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 711e009381ec..603391cac85b 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -26,9 +26,10 @@ static inline unsigned long long rdclock(void) } #ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 1024 +#define MAX_NR_CPUS 2048 #endif +int nr_cpus_onln; extern const char *input_name; extern bool perf_host, perf_guest; extern const char perf_version_string[]; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c11a459ca582..83c05afef063 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -72,7 +72,7 @@ struct cpu_map *cpu_map__read(FILE *file) int new_max = nr_cpus + cpu - prev - 1; if (new_max >= max_entries) { - max_entries = new_max + MAX_NR_CPUS / 2; + max_entries = new_max + nr_cpus_onln / 2; tmp = realloc(tmp_cpus, max_entries * sizeof(int)); if (tmp == NULL) goto out_free_tmp; @@ -83,7 +83,7 @@ struct cpu_map *cpu_map__read(FILE *file) tmp_cpus[nr_cpus++] = prev; } if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; + max_entries += nr_cpus_onln; tmp = realloc(tmp_cpus, max_entries * sizeof(int)); if (tmp == NULL) goto out_free_tmp; @@ -170,7 +170,7 @@ struct cpu_map *cpu_map__new(const char *cpu_list) goto invalid; if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; + max_entries += nr_cpus_onln; tmp = realloc(tmp_cpus, max_entries * sizeof(int)); if (tmp == NULL) goto invalid; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 06ddb6618ef3..78f1acb069ed 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1121,16 +1121,15 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) return 0; } -#define MAX_CACHES 2000 - static int write_cache(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { - struct cpu_cache_level caches[MAX_CACHES]; + u32 max_caches = (nr_cpus_onln * 4); + struct cpu_cache_level caches[max_caches]; u32 cnt = 0, i, version = 1; int ret; - ret = build_caches(caches, MAX_CACHES, &cnt); + ret = build_caches(caches, max_caches, &cnt); if (ret) goto out; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 17eec39e775e..b4d792dbeb1f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2593,7 +2593,7 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too pid_t machine__get_current_tid(struct machine *machine, int cpu) { - if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid) + if (cpu < 0 || cpu >= nr_cpus_onln || !machine->current_tid) return -1; return machine->current_tid[cpu]; @@ -2610,16 +2610,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, if (!machine->current_tid) { int i; - machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t)); + machine->current_tid = calloc(nr_cpus_onln, sizeof(pid_t)); if (!machine->current_tid) return -ENOMEM; - for (i = 0; i < MAX_NR_CPUS; i++) + for (i = 0; i < nr_cpus_onln; i++) machine->current_tid[i] = -1; } - if (cpu >= MAX_NR_CPUS) { - pr_err("Requested CPU %d too large. ", cpu); - pr_err("Consider raising MAX_NR_CPUS\n"); + if (cpu >= nr_cpus_onln) { + pr_err("Requested CPU %d too large, there are %d CPUs currently online.\n", cpu, nr_cpus_onln); return -EINVAL; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 54cf163347f7..8641364555fb 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2289,9 +2289,8 @@ int perf_session__cpu_bitmap(struct perf_session *session, for (i = 0; i < map->nr; i++) { int cpu = map->map[i]; - if (cpu >= MAX_NR_CPUS) { - pr_err("Requested CPU %d too large. " - "Consider raising MAX_NR_CPUS\n", cpu); + if (cpu >= nr_cpus_onln) { + pr_err("Requested CPU %d too large, there are %d CPUs currently online.\n", cpu, nr_cpus_onln); goto out_delete_map; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index d91fe754b6d2..9d4a3b96496a 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -207,7 +207,7 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) static void zero_per_pkg(struct perf_evsel *counter) { if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); + memset(counter->per_pkg_mask, 0, nr_cpus_onln); } static int check_per_pkg(struct perf_evsel *counter, @@ -226,7 +226,7 @@ static int check_per_pkg(struct perf_evsel *counter, return 0; if (!mask) { - mask = zalloc(MAX_NR_CPUS); + mask = zalloc(nr_cpus_onln); if (!mask) return -ENOMEM; diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index fab8a048d31b..6a8241932dce 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -705,7 +705,7 @@ static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), - MAX_NR_CPUS) + nr_cpus_onln) if (map[thr] == -1) map[thr] = (*pos)++; } @@ -720,7 +720,7 @@ static void scan_core_topology(int *map, struct topology *t) for (i = 0; i < t->sib_core_nr; i++) for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), - MAX_NR_CPUS) + nr_cpus_onln) scan_thread_topology(map, t, cpu, &pos); } @@ -737,7 +737,7 @@ static int str_to_bitmap(char *s, cpumask_t *b) for (i = 0; i < m->nr; i++) { c = m->map[i]; - if (c >= MAX_NR_CPUS) { + if (c >= nr_cpus_onln) { ret = -1; break; } @@ -784,13 +784,13 @@ int svg_build_topology_map(char *sib_core, int sib_core_nr, sib_thr += strlen(sib_thr) + 1; } - topology_map = malloc(sizeof(int) * MAX_NR_CPUS); + topology_map = malloc(sizeof(int) * nr_cpus_onln); if (!topology_map) { fprintf(stderr, "topology: no memory\n"); goto exit; } - for (i = 0; i < MAX_NR_CPUS; i++) + for (i = 0; i < nr_cpus_onln; i++) topology_map[i] = -1; scan_core_topology(topology_map, &t); -- 2.12.3