[[PATCH v6 03/10] perf: Split perf_event_read() and perf_event_count()
perf_event_read() does two things: - call the PMU to read/update the counter value, and - compute the total count of the event and its children Not all callers need both. perf_event_reset() for instance needs the first piece but doesn't need the second. Similarly, when we implement the ability to read a group of events using the transaction interface, we would need the two pieces done independently. Break up perf_event_read() and have it just read/update the counter and have the callers compute the total count if necessary. Signed-off-by: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> --- kernel/events/core.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e221432..01ede6b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3275,7 +3275,7 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static u64 perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3301,8 +3301,6 @@ static u64 perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(>lock, flags); } - - return perf_event_count(event); } /* @@ -3818,14 +3816,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(>child_mutex); - total += perf_event_read(event); + + perf_event_read(event); + total += perf_event_count(event); + *enabled += event->total_time_enabled + atomic64_read(>child_total_time_enabled); *running += event->total_time_running + atomic64_read(>child_total_time_running); list_for_each_entry(child, >child_list, child_list) { - total += perf_event_read(child); + perf_event_read(child); + total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; } @@ -3985,7 +3987,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + perf_event_read(event); local64_set(>count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[[PATCH v6 05/10] perf: Add group reads to perf_event_read()
From: Peter Zijlstra <pet...@infradead.org> Enable perf_event_read() to update entire groups at once, this will be useful for read transactions. Cc: Ingo Molnar <mi...@redhat.com> Cc: Arnaldo Carvalho de Melo <a...@kernel.org> Cc: Michael Ellerman <m...@ellerman.id.au> Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> Link: http://lkml.kernel.org/r/20150723080435.ge25...@twins.programming.kicks-ass.net --- kernel/events/core.c | 39 --- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index be39e63..7bb9141 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3184,12 +3184,18 @@ void perf_event_exec(void) rcu_read_unlock(); } +struct perf_read_data { + struct perf_event *event; + bool group; +}; + /* * Cross CPU call to read the hardware event */ static void __perf_event_read(void *info) { - struct perf_event *event = info; + struct perf_read_data *data = info; + struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -3208,9 +3214,21 @@ static void __perf_event_read(void *info) update_context_time(ctx); update_cgrp_time_from_event(event); } + update_event_times(event); if (event->state == PERF_EVENT_STATE_ACTIVE) event->pmu->read(event); + + if (!data->group) + goto unlock; + + list_for_each_entry(sub, >sibling_list, group_entry) { + update_event_times(sub); + if (sub->state == PERF_EVENT_STATE_ACTIVE) + sub->pmu->read(sub); + } + +unlock: raw_spin_unlock(>lock); } @@ -3275,15 +3293,19 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static void perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event, bool group) { /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ if (event->state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data = { + .event = event, + .group = group, + }; smp_call_function_single(event->oncpu, -__perf_event_read, event, 1); +__perf_event_read, , 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -3298,7 +3320,10 @@ static void perf_event_read(struct perf_event *event) update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + if (group) + update_group_times(event); + else + update_event_times(event); raw_spin_unlock_irqrestore(>lock, flags); } } @@ -3817,7 +3842,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(>child_mutex); - perf_event_read(event); + perf_event_read(event, false); total += perf_event_count(event); *enabled += event->total_time_enabled + @@ -3826,7 +3851,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(>child_total_time_running); list_for_each_entry(child, >child_list, child_list) { - perf_event_read(child); + perf_event_read(child, false); total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; @@ -3987,7 +4012,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event); + perf_event_read(event, false); local64_set(>count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[[PATCH v6 10/10] perf: Drop PERF_EVENT_TXN
We currently use PERF_EVENT_TXN flag to determine if we are in the middle of a transaction. If in a transaction, we defer the schedulability checks from pmu->add() operation to the pmu->commit() operation. Now that we have "transaction types" (PERF_PMU_TXN_ADD, PERF_PMU_TXN_READ) we can use the type to determine if we are in a transaction and drop the PERF_EVENT_TXN flag. When PERF_EVENT_TXN is dropped, the cpuhw->group_flag on some architectures becomes unused, so drop that field as well. This is an extension of the Powerpc patch from Peter Zijlstra to s390, Sparc and x86 architectures. Signed-off-by: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> --- arch/powerpc/perf/core-book3s.c |6 +- arch/s390/kernel/perf_cpum_cf.c |5 + arch/sparc/kernel/perf_event.c |6 +- arch/x86/kernel/cpu/perf_event.c |7 ++- arch/x86/kernel/cpu/perf_event.h |1 - include/linux/perf_event.h |2 -- 6 files changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a9e9a39..b699d19 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -48,7 +48,6 @@ struct cpu_hw_events { unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; - unsigned int group_flag; unsigned int txn_flags; int n_txn_start; @@ -1442,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuhw->group_flag & PERF_EVENT_TXN) + if (cpuhw->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -1603,7 +1602,6 @@ static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) return; perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -1624,7 +1622,6 @@ static void power_pmu_cancel_txn(struct pmu *pmu) if (txn_flags & ~PERF_PMU_TXN_ADD) return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1659,7 +1656,6 @@ static int power_pmu_commit_txn(struct pmu *pmu) for (i = cpuhw->n_txn_start; i < n; ++i) cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuhw->group_flag &= ~PERF_EVENT_TXN; cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index dbcfd29..f50ef65 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -536,7 +536,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) * For group events transaction, the authorization check is * done in cpumf_pmu_commit_txn(). */ - if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD)) if (validate_ctr_auth(>hw)) return -EPERM; @@ -590,7 +590,6 @@ static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) return; perf_pmu_disable(pmu); - cpuhw->flags |= PERF_EVENT_TXN; cpuhw->tx_state = cpuhw->state; } @@ -613,7 +612,6 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu) WARN_ON(cpuhw->tx_state != cpuhw->state); - cpuhw->flags &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -640,7 +638,6 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) if ((state & cpuhw->info.auth_ctl) != state) return -EPERM; - cpuhw->flags &= ~PERF_EVENT_TXN; cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2c0984d..b0da5ae 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -108,7 +108,6 @@ struct cpu_hw_events { /* Enabled/disable state. */ int enabled; - unsigned intgroup_flag; unsigned inttxn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -1380,7 +1379,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuc->event, n0, 1)) @@ -1506,7 +1505,6 @@ stati
Re: [PATCH v5 1/8] perf: Add a flags parameter to pmu txn interfaces
Peter Zijlstra [pet...@infradead.org] wrote: | | when looking at this (I almost pressed A for apply) it occurred to me | that we now keep double state, cpuhw->txn_flags and cpuhw->group_flag | are basically the same thing. | | Would not something like the below avoid this duplication? Yes, it makes sense to drop the duplication. Will similarly drop the usage of PERF_EVENT_TXN from other architectures too and try it out. Thanks, Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v16 00/16] perf, tools: Add support for PMU events in JSON format
Sukadev Bhattiprolu [suka...@linux.vnet.ibm.com] wrote: | CPUs support a large number of performance monitoring events (PMU events) | and often these events are very specific to an architecture/model of the | CPU. To use most of these PMU events with perf, we currently have to identify | them by their raw codes: | | perf stat -e r100f2 sleep 1 | | This patchset allows architectures to specify these PMU events in JSON | files located in 'tools/perf/pmu-events/arch/' of the mainline tree. | The events from the JSON files for the architecture are then built into | the perf binary. | | At run time, perf identifies the specific set of events for the CPU and | creates "event aliases". These aliases allow users to specify events by | "name" as: | | perf stat -e pm_1plus_ppc_cmpl sleep 1 | | The file, 'tools/perf/pmu-events/README' in [PATCH 16/16] gives more | details. | | Note: | - All known events tables for the architecture are included in the | perf binary. | | - For architectures that don't have any JSON files, an empty mapping | table is created and they should continue to build. | | Thanks to input from Andi Kleen, Jiri Olsa, Namhyung Kim and Ingo Molnar. | | These patches are available from: | | https://github.com:sukadev/linux.git | | Branch Description | -- | json-v16Source Code only | json-files-6x86 and Powerpc datafiles only | json-v16-with-data Both code and data (for build/test) Arnaldo, Ingo, I added Andi's patch https://lkml.org/lkml/2015/8/28/521 to 'json-v16' branch. and created a new branch, 'json-files-7' with updated to Intel data files. Here are the three new branches. https://github.com:sukadev/linux.git Branch Description -- json-v16Source Code only json-files-7x86 and Powerpc datafiles only json-v16.1-with-dataBoth code and data (for build/test) Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 04/16] perf, tools: Support CPU ID matching for Powerpc
Implement code that returns the generic CPU ID string for Powerpc. This will be used to identify the specific table of PMU events to parse/compare user specified events against. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/arch/powerpc/util/header.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a7..65f9391 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +char * +get_cpuid_str(void) +{ + char *bufp; + + if (asprintf(bufp, %.8lx, mfspr(SPRN_PVR)) 0) + bufp = NULL; + + return bufp; +} -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 07/16] perf, tools: Query terminal width and use in perf list
From: Andi Kleen a...@linux.intel.com Automatically adapt the now wider and word wrapped perf list output to wider terminals. This requires querying the terminal before the auto pager takes over, and exporting this information from the pager subsystem. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Namhyung Kim namhy...@kernel.org Acked-by: Jiri Olsa jo...@redhat.com --- tools/perf/util/cache.h |1 + tools/perf/util/pager.c | 15 +++ tools/perf/util/pmu.c |3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index c861373..8e0d4b8 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -32,6 +32,7 @@ extern const char *perf_config_dirname(const char *, const char *); extern void setup_pager(void); extern int pager_in_use(void); extern int pager_use_color; +int pager_get_columns(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 53ef006..1770c88 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -1,6 +1,7 @@ #include cache.h #include run-command.h #include sigchain.h +#include sys/ioctl.h /* * This is split up from the rest of git so that we can do @@ -8,6 +9,7 @@ */ static int spawned_pager; +static int pager_columns; static void pager_preexec(void) { @@ -47,9 +49,12 @@ static void wait_for_pager_signal(int signo) void setup_pager(void) { const char *pager = getenv(PERF_PAGER); + struct winsize sz; if (!isatty(1)) return; + if (ioctl(1, TIOCGWINSZ, sz) == 0) + pager_columns = sz.ws_col; if (!pager) pager = getenv(PAGER); if (!(pager || access(/usr/bin/pager, X_OK))) @@ -93,3 +98,13 @@ int pager_in_use(void) env = getenv(PERF_PAGER_IN_USE); return env ? perf_config_bool(PERF_PAGER_IN_USE, env) : 0; } + +int pager_get_columns(void) +{ + char *s; + + s = getenv(COLUMNS); + if (s) + return atoi(s); + return (pager_columns ? pager_columns : 80) - 2; +} diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index cb0396e..0f10fa3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -14,6 +14,7 @@ #include cpumap.h #include header.h #include pmu-events/pmu-events.h +#include cache.h struct perf_pmu_format { char *name; @@ -1079,7 +1080,7 @@ void print_pmu_events(const char *event_glob, bool name_only) int len, j; struct pair *aliases; int numdesc = 0; - int columns = 78; + int columns = pager_get_columns(); pmu = NULL; len = 0; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 08/16] perf, tools: Add a --no-desc flag to perf list
From: Andi Kleen a...@linux.intel.com Add a --no-desc flag to perf list to not print the event descriptions that were earlier added for JSON events. This may be useful to get a less crowded listing. It's still default to print descriptions as that is the more useful default for most users. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Rename --quiet to --no-desc. Add option to man page. --- tools/perf/Documentation/perf-list.txt |8 +++- tools/perf/builtin-list.c | 12 tools/perf/util/parse-events.c |4 ++-- tools/perf/util/parse-events.h |2 +- tools/perf/util/pmu.c |4 ++-- tools/perf/util/pmu.h |2 +- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index bada893..9507552 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,13 +8,19 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +OPTIONS +--- +--no-desc:: +Don't print descriptions. + + [[EVENT_MODIFIERS]] EVENT MODIFIERS --- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index af5bd05..3f058f7 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -16,16 +16,20 @@ #include util/pmu.h #include util/parse-options.h +static bool desc_flag = true; + int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; struct option list_options[] = { OPT_BOOLEAN(0, raw-dump, raw_dump, Dump raw events), + OPT_BOOLEAN('d', desc, desc_flag, + Print extra event descriptions. --no-desc to not print.), OPT_END() }; const char * const list_usage[] = { - perf list [hw|sw|cache|tracepoint|pmu|event_glob], + perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob], NULL }; @@ -40,7 +44,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf(\nList of pre-defined events (to be used in -e):\n\n); if (argc == 0) { - print_events(NULL, raw_dump); + print_events(NULL, raw_dump, !desc_flag); return 0; } @@ -59,13 +63,13 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], hwcache) == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], pmu) == 0) - print_pmu_events(NULL, raw_dump); + print_pmu_events(NULL, raw_dump, !desc_flag); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump); + print_events(argv[i], raw_dump, !desc_flag); continue; } sep_idx = sep - argv[i]; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d826e6f..5a4aed7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1668,7 +1668,7 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only) +void print_events(const char *event_glob, bool name_only, bool quiet_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1678,7 +1678,7 @@ void print_events(const char *event_glob, bool name_only) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only); + print_pmu_events(event_glob, name_only, quiet_flag); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index a09b0e2..c0ee03b 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -138,7 +138,7 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only); +void print_events(const char *event_glob, bool name_only, bool
[PATCH v16 09/16] perf, tools: Add override support for event list CPUID
From: Andi Kleen a...@linux.intel.com Add a PERF_CPUID variable to override the CPUID of the current CPU (within the current architecture). This is useful for testing, so that all event lists can be tested on a single system. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Fix double free in earlier version. Print actual CPUID being used with verbose option. --- tools/perf/util/pmu.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e4cb21e..ca01aea 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -477,10 +477,16 @@ static int pmu_add_cpu_aliases(struct list_head *head) struct pmu_event *pe; char *cpuid; - cpuid = get_cpuid_str(); + cpuid = getenv(PERF_CPUID); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(); if (!cpuid) return 0; + pr_debug(Using CPUID %s\n, cpuid); + i = 0; while (1) { map = pmu_events_map[i++]; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 00/16] perf, tools: Add support for PMU events in JSON format
CPUs support a large number of performance monitoring events (PMU events) and often these events are very specific to an architecture/model of the CPU. To use most of these PMU events with perf, we currently have to identify them by their raw codes: perf stat -e r100f2 sleep 1 This patchset allows architectures to specify these PMU events in JSON files located in 'tools/perf/pmu-events/arch/' of the mainline tree. The events from the JSON files for the architecture are then built into the perf binary. At run time, perf identifies the specific set of events for the CPU and creates event aliases. These aliases allow users to specify events by name as: perf stat -e pm_1plus_ppc_cmpl sleep 1 The file, 'tools/perf/pmu-events/README' in [PATCH 16/16] gives more details. Note: - All known events tables for the architecture are included in the perf binary. - For architectures that don't have any JSON files, an empty mapping table is created and they should continue to build. Thanks to input from Andi Kleen, Jiri Olsa, Namhyung Kim and Ingo Molnar. These patches are available from: https://github.com:sukadev/linux.git Branch Description -- json-v16Source Code only json-files-6x86 and Powerpc datafiles only json-v16-with-data Both code and data (for build/test) NOTE: Only source code patches (i.e those in json-v16) are being emailed. Please pull the data files from the json-files-6 branch. Changelog[v16] Rebase to recent perf/core; fix minor merge conflicts; drop 3 patches that were merged into perf/core. Changelog[v15] Code changes: - Fix 'perf list' usage string and update man page. - Remove a redundant __maybe_unused tag. - Rebase to recent perf/core branch. Data files updates: json-files-5 branch - Rebase to perf/intel-json-files-5 from Andi Kleen - Add patch from Madhavan Srinivasan for couple more Powerpc models Changelog[v14] Comments from Jiri Olsa: - Change parameter name/type for pmu_add_cpu_aliases (from void *data to list_head *head) - Use asprintf() in file_name_to_tablename() and simplify/reorg code. - Use __weak definition from linux/compile.h - Use fopen() with mode w and eliminate unlink() - Remove minor TODO. - Add error check for return value from strdup() in print_pmu_events(). - Move independent changes from patches 3,11,12 .. to separate patches for easier review/backport. - Clarify mapfile's header line support in patch description. - Fix build failure with DEBUG=1 Comment from Andi Kleen: - In tools/perf/pmu-events/Build, check for 'mapfile.csv' rather than 'mapfile*' Misc: - Minor changes/clarifications to tools/perf/pmu-events/README. Changelog[v13] Version: Individual patches have their own history :-) that I am preserving. Patchset version (v13) is for overall patchset and is somewhat arbitrary. - Added support for categories of events to perf - Add mapfile, jevents build dependency on pmu-events.c - Silence jevents when parsing JSON files unless V=1 is specified - Cleanup error messages - Fix memory leak with -cpuid - Rebase to Arnaldo's tree - Allow overriding CPUID via environment variable - Support long descriptions for events - Handle header line in mapfile.csv - Cleanup JSON files (trim PublicDescription if identical to/prefix of BriefDescription field) Andi Kleen (9): perf, tools: Add jsmn `jasmine' JSON parser perf, tools, jevents: Program to convert JSON file to C style file perf, tools: Support CPU id matching for x86 v2 perf, tools: Support alias descriptions perf, tools: Query terminal width and use in perf list perf, tools: Add a --no-desc flag to perf list perf, tools: Add override support for event list CPUID perf, tools: Add support for event list topics perf, tools: Handle header line in mapfile Sukadev Bhattiprolu (7): perf, tools: Use pmu_events table to create aliases perf, tools: Support CPU ID matching for Powerpc perf, tools, jevents: Add support for long descriptions perf, tools: Add alias support for long descriptions perf, tools: Support long descriptions with perf list perf, tools, jevents: Add support for event topics perf, tools: Add README for info on parsing JSON/map files tools/perf/Documentation/perf-list.txt | 12 +- tools/perf/Makefile.perf | 26 +- tools/perf/arch/powerpc/util/header.c | 11 + tools/perf/arch/x86/util/header.c | 24 +- tools/perf/builtin-list.c | 17 +- tools/perf/pmu-events/Build
[PATCH v16 13/16] perf, tools, jevents: Add support for event topics
Allow assigning categories Topics field to the PMU events i.e. process the topic field from the JSON file and add a corresponding topic field to the generated C events tables. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/pmu-events/jevents.c| 12 +--- tools/perf/pmu-events/jevents.h|2 +- tools/perf/pmu-events/pmu-events.h |1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index a8507c9..ea3474b 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc, char *long_desc) + char *desc, char *long_desc, char *topic) { FILE *outfp = data; /* @@ -217,6 +217,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, \t.desc = \%s\,\n, desc); if (long_desc long_desc[0]) fprintf(outfp, \t.long_desc = \%s\,\n, long_desc); + if (topic) + fprintf(outfp, \t.topic = \%s\,\n, topic); fprintf(outfp, },\n); @@ -238,7 +240,7 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc), + char *long_desc, char *topic), void *data) { int err = -EIO; @@ -259,6 +261,7 @@ int json_events(const char *fn, char *event = NULL, *desc = NULL, *name = NULL; char *long_desc = NULL; char *extra_desc = NULL; + char *topic = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -297,6 +300,8 @@ int json_events(const char *fn, !json_streq(map, val, null)) { addfield(map, extra_desc, . , Spec update: , val); + } else if (json_streq(map, field, Topic)) { + addfield(map, topic, , , val); } else if (json_streq(map, field, Data_LA) nz) { addfield(map, extra_desc, . , Supports address when precise, @@ -320,12 +325,13 @@ int json_events(const char *fn, addfield(map, event, ,, msr-pname, msrval); fixname(name); - err = func(data, name, event, desc, long_desc); + err = func(data, name, event, desc, long_desc, topic); free(event); free(desc); free(name); free(long_desc); free(extra_desc); + free(topic); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index b0eb274..9ffcb89 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -3,7 +3,7 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc), + char *long_desc, char *topic), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 711f049..6b69f4b 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -9,6 +9,7 @@ struct pmu_event { const char *event; const char *desc; const char *long_desc; + const char *topic; }; /* -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 10/16] perf, tools, jevents: Add support for long descriptions
Implement support in jevents to parse long descriptions for events that may have them in the JSON files. A follow on patch will make this long description available to user through the 'perf list' command. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/pmu-events/jevents.c| 31 +++ tools/perf/pmu-events/jevents.h|3 ++- tools/perf/pmu-events/pmu-events.h |1 + 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 5f7603b..a8507c9 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc) + char *desc, char *long_desc) { FILE *outfp = data; /* @@ -215,6 +215,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, \t.name = \%s\,\n, name); fprintf(outfp, \t.event = \%s\,\n, event); fprintf(outfp, \t.desc = \%s\,\n, desc); + if (long_desc long_desc[0]) + fprintf(outfp, \t.long_desc = \%s\,\n, long_desc); fprintf(outfp, },\n); @@ -235,7 +237,8 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc), + int (*func)(void *data, char *name, char *event, char *desc, + char *long_desc), void *data) { int err = -EIO; @@ -254,6 +257,8 @@ int json_events(const char *fn, tok = tokens + 1; for (i = 0; i tokens-size; i++) { char *event = NULL, *desc = NULL, *name = NULL; + char *long_desc = NULL; + char *extra_desc = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -279,6 +284,9 @@ int json_events(const char *fn, } else if (json_streq(map, field, BriefDescription)) { addfield(map, desc, , , val); fixdesc(desc); + } else if (json_streq(map, field, PublicDescription)) { + addfield(map, long_desc, , , val); + fixdesc(long_desc); } else if (json_streq(map, field, PEBS) nz) { precise = val; } else if (json_streq(map, field, MSRIndex) nz) { @@ -287,10 +295,10 @@ int json_events(const char *fn, msrval = val; } else if (json_streq(map, field, Errata) !json_streq(map, val, null)) { - addfield(map, desc, . , + addfield(map, extra_desc, . , Spec update: , val); } else if (json_streq(map, field, Data_LA) nz) { - addfield(map, desc, . , + addfield(map, extra_desc, . , Supports address when precise, NULL); } @@ -298,19 +306,26 @@ int json_events(const char *fn, } if (precise !strstr(desc, (Precise Event))) { if (json_streq(map, precise, 2)) - addfield(map, desc, , (Must be precise), - NULL); + addfield(map, extra_desc, , + (Must be precise), NULL); else - addfield(map, desc, , + addfield(map, extra_desc, , (Precise event), NULL); } + if (desc extra_desc) + addfield(map, desc, , extra_desc, NULL); + if (long_desc extra_desc) + addfield(map, long_desc, , extra_desc, NULL); if (msr != NULL) addfield(map, event, ,, msr-pname, msrval); fixname(name); - err = func(data, name, event, desc); + + err = func(data, name, event, desc, long_desc); free(event); free(desc); free(name
[PATCH v16 03/16] perf, tools: Use pmu_events table to create aliases
At run time (when 'perf' is starting up), locate the specific table of PMU events that corresponds to the current CPU. Using that table, create aliases for the each of the PMU events in the CPU. The use these aliases to parse the user specified perf event. In short this would allow the user to specify events using their aliases rather than raw event codes. Based on input and some earlier patches from Andi Kleen, Jiri Olsa. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v4] - Split off unrelated code into separate patches. Changelog[v3] - [Jiri Olsa] Fix memory leak in cpuid Changelog[v2] - [Andi Kleen] Replace pmu_events_map-vfm with a generic cpuid. --- tools/perf/util/header.h |1 + tools/perf/util/pmu.c| 61 ++ 2 files changed, 62 insertions(+) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 9b53b65..10f4a24 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -158,4 +158,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); */ int get_cpuid(char *buffer, size_t sz); +char *get_cpuid_str(void); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 89c91a1..b8f7627 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,8 @@ #include pmu.h #include parse-events.h #include cpumap.h +#include header.h +#include pmu-events/pmu-events.h struct perf_pmu_format { char *name; @@ -449,6 +451,62 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } +/* + * Return the CPU id as a raw string. + * + * Each architecture should provide a more precise id string that + * can be use to match the architecture's mapfile. + */ +char * __weak get_cpuid_str(void) +{ + return NULL; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static int pmu_add_cpu_aliases(struct list_head *head) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + char *cpuid; + + cpuid = get_cpuid_str(); + if (!cpuid) + return 0; + + i = 0; + while (1) { + map = pmu_events_map[i++]; + if (!map-table) + goto out; + + if (!strcmp(map-cpuid, cpuid)) + break; + } + + /* +* Found a matching PMU events table. Create aliases +*/ + i = 0; + while (1) { + pe = map-table[i++]; + if (!pe-name) + break; + + /* need type casts to override 'const' */ + __perf_pmu__new_alias(head, NULL, (char *)pe-name, + (char *)pe-desc, (char *)pe-event); + } + +out: + free(cpuid); + return 0; +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -473,6 +531,9 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, aliases)) return NULL; + if (!strcmp(name, cpu)) + (void)pmu_add_cpu_aliases(aliases); + if (pmu_type(name, type)) return NULL; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 11/16] perf, tools: Add alias support for long descriptions
Previously we were dropping the useful longer descriptions that some events have in the event list completely. Now that jevents provides support for longer descriptions (see previous patch), add support for parsing the long descriptions Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/util/parse-events.c |5 +++-- tools/perf/util/parse-events.h |3 ++- tools/perf/util/pmu.c | 15 ++- tools/perf/util/pmu.h |4 +++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a4aed7..e14ceb6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1668,7 +1668,8 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1678,7 +1679,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c0ee03b..e468931 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -138,7 +138,8 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only, bool quiet); +void print_events(const char *event_glob, bool name_only, bool quiet, + bool long_desc); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ca01aea..e608ccc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc, char *val) +char *desc, char *val, char *long_desc) { struct perf_pmu_alias *alias; int ret; @@ -243,6 +243,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, } alias-desc = desc ? strdup(desc) : NULL; + alias-long_desc = long_desc ? strdup(long_desc) : + desc ? strdup(desc) : NULL; list_add_tail(alias-list, list); @@ -260,7 +262,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -508,7 +510,8 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe-name, - (char *)pe-desc, (char *)pe-event); + (char *)pe-desc, (char *)pe-event, + (char *)pe-long_desc); } out: @@ -1077,7 +1080,8 @@ static void wordwrap(char *s, int start, int max, int corr) } } -void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1124,7 +1128,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) if (!aliases[j].name) goto out_enomem; - aliases[j].desc = alias-desc; + aliases[j].desc = long_desc ? alias-long_desc : + alias-desc; j++; } if (pmu-selectable) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 42999c7..1aa614e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -39,6 +39,7 @@ struct perf_pmu_info { struct perf_pmu_alias { char *name; char *desc; + char *long_desc
[PATCH v16 12/16] perf, tools: Support long descriptions with perf list
Previously we were dropping the useful longer descriptions that some events have in the event list completely. This patch makes them appear with perf list. Old perf list: baclears: baclears.all [Counts the number of baclears] vs new: perf list -v: ... baclears: baclears.all [The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. The BACLEARS.ANY event counts the number of baclears for any type of branch] Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v15] - [Jir Olsa, Andi Kleen] Fix usage strings; update man page. Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/Documentation/perf-list.txt |6 +- tools/perf/builtin-list.c | 13 + 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 9507552..48202f2 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- @@ -20,6 +20,10 @@ OPTIONS --no-desc:: Don't print descriptions. +-v:: +--long-desc:: +Print longer event descriptions. + [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 3f058f7..f800927 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -22,14 +22,17 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; + bool long_desc_flag = false; struct option list_options[] = { OPT_BOOLEAN(0, raw-dump, raw_dump, Dump raw events), OPT_BOOLEAN('d', desc, desc_flag, Print extra event descriptions. --no-desc to not print.), + OPT_BOOLEAN('v', long-desc, long_desc_flag, + Print longer event descriptions.), OPT_END() }; const char * const list_usage[] = { - perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob], + perf list [options] [hw|sw|cache|tracepoint|pmu|event_glob], NULL }; @@ -44,7 +47,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf(\nList of pre-defined events (to be used in -e):\n\n); if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag); return 0; } @@ -63,13 +66,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], hwcache) == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], pmu) == 0) - print_pmu_events(NULL, raw_dump, !desc_flag); + print_pmu_events(NULL, raw_dump, !desc_flag, + long_desc_flag); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump, !desc_flag); + print_events(argv[i], raw_dump, !desc_flag, + long_desc_flag); continue; } sep_idx = sep - argv[i]; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 06/16] perf, tools: Support alias descriptions
From: Andi Kleen a...@linux.intel.com Add support to print alias descriptions in perf list, which are taken from the generated event files. The sorting code is changed to put the events with descriptions at the end. The descriptions are printed as possibly multiple word wrapped lines. Example output: % perf list ... arith.fpu_div [Divide operations executed] arith.fpu_div_active [Cycles when divider is busy executing divide operations] Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog - Delete a redundant free() Changelog[v14] - [Jiri Olsa] Fail, rather than continue if strdup() returns NULL; remove unnecessary __maybe_unused. --- tools/perf/util/pmu.c | 82 +++-- tools/perf/util/pmu.h |1 + 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b8f7627..cb0396e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -209,7 +209,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc __maybe_unused, char *val) +char *desc, char *val) { struct perf_pmu_alias *alias; int ret; @@ -241,6 +241,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias-desc = desc ? strdup(desc) : NULL; + list_add_tail(alias-list, list); return 0; @@ -1030,11 +1032,42 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -static int cmp_string(const void *a, const void *b) +struct pair { + char *name; + char *desc; +}; + +static int cmp_pair(const void *a, const void *b) +{ + const struct pair *as = a; + const struct pair *bs = b; + + /* Put extra events last */ + if (!!as-desc != !!bs-desc) + return !!as-desc - !!bs-desc; + return strcmp(as-name, bs-name); +} + +static void wordwrap(char *s, int start, int max, int corr) { - const char * const *as = a; - const char * const *bs = b; - return strcmp(*as, *bs); + int column = start; + int n; + + while (*s) { + int wlen = strcspn(s, \t); + + if (column + wlen = max column start) { + printf(\n%*s, start, ); + column = start + corr; + } + n = printf(%s%.*s, column start ? : , wlen, s); + if (n = 0) + break; + s += wlen; + column += n; + while (isspace(*s)) + s++; + } } void print_pmu_events(const char *event_glob, bool name_only) @@ -1044,7 +1077,9 @@ void print_pmu_events(const char *event_glob, bool name_only) char buf[1024]; int printed = 0; int len, j; - char **aliases; + struct pair *aliases; + int numdesc = 0; + int columns = 78; pmu = NULL; len = 0; @@ -1054,14 +1089,15 @@ void print_pmu_events(const char *event_glob, bool name_only) if (pmu-selectable) len++; } - aliases = zalloc(sizeof(char *) * len); + aliases = zalloc(sizeof(struct pair) * len); if (!aliases) goto out_enomem; pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, pmu-aliases, list) { - char *name = format_alias(buf, sizeof(buf), pmu, alias); + char *name = alias-desc ? alias-name : + format_alias(buf, sizeof(buf), pmu, alias); bool is_cpu = !strcmp(pmu-name, cpu); if (event_glob != NULL @@ -1070,37 +1106,51 @@ void print_pmu_events(const char *event_glob, bool name_only) event_glob continue; - if (is_cpu !name_only) + if (is_cpu !name_only !alias-desc) name = format_alias_or(buf, sizeof(buf), pmu, alias); - aliases[j] = strdup(name); - if (aliases[j] == NULL) + aliases[j].name = name; + if (is_cpu !name_only !alias-desc) + aliases[j].name = format_alias_or(buf, sizeof(buf), + pmu, alias); + aliases[j].name = strdup(aliases[j].name
[PATCH v16 16/16] perf, tools: Add README for info on parsing JSON/map files
Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- tools/perf/pmu-events/README | 122 ++ 1 file changed, 122 insertions(+) create mode 100644 tools/perf/pmu-events/README diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README new file mode 100644 index 000..da57cb5 --- /dev/null +++ b/tools/perf/pmu-events/README @@ -0,0 +1,122 @@ + +The contents of this directory allow users to specify PMU events in +their CPUs by their symbolic names rather than raw event codes (see +example below). + +The main program in this directory, is the 'jevents', which is built and +executed _before_ the perf binary itself is built. + +The 'jevents' program tries to locate and process JSON files in the directory +tree tools/perf/pmu-events/arch/foo. + + - Regular files with '.json' extension in the name are assumed to be + JSON files, each of which describes a set of PMU events. + + - Regular files with basename starting with 'mapfile.csv' are assumed + to be a CSV file that maps a specific CPU to its set of PMU events. + (see below for mapfile format) + + - Directories are traversed, but all other files are ignored. + +Using the JSON files and the mapfile, 'jevents' generates the C source file, +'pmu-events.c', which encodes the two sets of tables: + + - Set of 'PMU events tables' for all known CPUs in the architecture, + (one table like the following, per JSON file; table name 'pme_power8' + is derived from JSON file name, 'power8.json'). + + struct pmu_event pme_power8[] = { + + ... + + { + .name = pm_1plus_ppc_cmpl, + .event = event=0x100f2, + .desc = 1 or more ppc insts finished,, + }, + + ... + } + + - A 'mapping table' that maps each CPU of the architecture, to its + 'PMU events table' + + struct pmu_events_map pmu_events_map[] = { + { + .cpuid = 004b, + .version = 1, + .type = core, + .table = pme_power8 + }, + ... + + }; + +After the 'pmu-events.c' is generated, it is compiled and the resulting +'pmu-events.o' is added to 'libperf.a' which is then used to build perf. + +NOTES: + 1. Several CPUs can support same set of events and hence use a common + JSON file. Hence several entries in the pmu_events_map[] could map + to a single 'PMU events table'. + + 2. The 'pmu-events.h' has an extern declaration for the mapping table + and the generated 'pmu-events.c' defines this table. + + 3. _All_ known CPU tables for architecture are included in the perf + binary. + +At run time, perf determines the actual CPU it is running on, finds the +matching events table and builds aliases for those events. This allows +users to specify events by their name: + + $ perf stat -e pm_1plus_ppc_cmpl sleep 1 + +where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. + +In case of errors when processing files in the tools/perf/pmu-events/arch +directory, 'jevents' tries to create an empty mapping file to allow the perf +build to succeed even if the PMU event aliases cannot be used. + +However some errors in processing may cause the perf build to fail. + +Mapfile format +=== + +The mapfile.csv format is expected to be: + + Header line + CPUID,Version,File/path/name.json,Type + +where: + + Comma: + is the required field delimiter (i.e other fields cannot + have commas within them). + + Comments: + Lines in which the first character is either '\n' or '#' + are ignored. + + Header line + The header line is the first line in the file, which is + _IGNORED_. It can be a comment (begin with '#') or empty. + + CPUID: + CPUID is an arch-specific char string, that can be used + to identify CPU (and associate it with a set of PMU events + it supports). Multiple CPUIDS can point to the same + File/path/name.json. + + Example: + CPUID == 'GenuineIntel-6-2E' (on x86). + CPUID == '004b0100' (PVR value in Powerpc) + Version: + is the Version of the mapfile. + + File/path/name.json: + is the pathname for the JSON file, relative to the directory + containing the mapfile.csv + + Type: + indicates whether the events or core or uncore events. -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc
[PATCH v16 05/16] perf, tools: Support CPU id matching for x86 v2
From: Andi Kleen a...@linux.intel.com Implement the code to match CPU types to mapfile types for x86 based on CPUID. This extends an existing similar function, but changes it to use the x86 mapfile cpu description. This allows to resolve event lists generated by jevents. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Update to new get_cpuid_str() interface --- tools/perf/arch/x86/util/header.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 146d12a..a74a48d 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, : a (op)); } -int -get_cpuid(char *buffer, size_t sz) +static int +__get_cpuid(char *buffer, size_t sz, const char *fmt) { unsigned int a, b, c, d, lvl; int family = -1, model = -1, step = -1; @@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz) if (family = 0x6) model += ((a 16) 0xf) 4; } - nb = scnprintf(buffer, sz, %s,%u,%u,%u$, vendor, family, model, step); + nb = scnprintf(buffer, sz, fmt, vendor, family, model, step); /* look for end marker to ensure the entire data fit */ if (strchr(buffer, '$')) { @@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +int +get_cpuid(char *buffer, size_t sz) +{ + return __get_cpuid(buffer, sz, %s,%u,%u,%u$); +} + +char * +get_cpuid_str(void) +{ + char *buf = malloc(128); + + if (__get_cpuid(buf, 128, %s-%u-%X$) 0) { + free(buf); + return NULL; + } + return buf; +} -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 14/16] perf, tools: Add support for event list topics
From: Andi Kleen a...@linux.intel.com Add support to group the output of perf list by the Topic field in the JSON file. Example output: % perf list ... Cache: l1d.replacement [L1D data line replacements] l1d_pend_miss.pending [L1D miss oustandings duration in cycles] l1d_pend_miss.pending_cycles [Cycles with L1D load Misses outstanding] l2_l1d_wb_rqsts.all [Not rejected writebacks from L1D to L2 cache lines in any state] l2_l1d_wb_rqsts.hit_e [Not rejected writebacks from L1D to L2 cache lines in E state] l2_l1d_wb_rqsts.hit_m [Not rejected writebacks from L1D to L2 cache lines in M state] ... Pipeline: arith.fpu_div [Divide operations executed] arith.fpu_div_active [Cycles when divider is busy executing divide operations] baclears.any [Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end] br_inst_exec.all_branches [Speculative and retired branches] br_inst_exec.all_conditional [Speculative and retired macro-conditional branches] br_inst_exec.all_direct_jmp [Speculative and retired macro-unconditional branches excluding calls and indirects] br_inst_exec.all_direct_near_call [Speculative and retired direct near calls] br_inst_exec.all_indirect_jump_non_call_ret Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Move jevents support for Topic to a separate patch. --- tools/perf/util/pmu.c | 37 +++-- tools/perf/util/pmu.h |1 + 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e608ccc..75a53d3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc, char *val, char *long_desc) +char *desc, char *val, char *long_desc, +char *topic) { struct perf_pmu_alias *alias; int ret; @@ -245,6 +246,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias-desc = desc ? strdup(desc) : NULL; alias-long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; + alias-topic = topic ? strdup(topic) : NULL; list_add_tail(alias-list, list); @@ -262,7 +264,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -511,7 +513,7 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe-name, (char *)pe-desc, (char *)pe-event, - (char *)pe-long_desc); + (char *)pe-long_desc, (char *)pe-topic); } out: @@ -1042,19 +1044,26 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -struct pair { +struct sevent { char *name; char *desc; + char *topic; }; -static int cmp_pair(const void *a, const void *b) +static int cmp_sevent(const void *a, const void *b) { - const struct pair *as = a; - const struct pair *bs = b; + const struct sevent *as = a; + const struct sevent *bs = b; /* Put extra events last */ if (!!as-desc != !!bs-desc) return !!as-desc - !!bs-desc; + if (as-topic bs-topic) { + int n = strcmp(as-topic, bs-topic); + + if (n) + return n; + } return strcmp(as-name, bs-name); } @@ -1088,9 +1097,10 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, char buf[1024]; int printed = 0; int len, j; - struct pair *aliases; + struct sevent *aliases; int numdesc = 0; int columns = pager_get_columns(); + char *topic = NULL; pmu = NULL; len = 0; @@ -1100,7 +1110,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, if (pmu-selectable) len++; } - aliases = zalloc(sizeof(struct pair) * len); + aliases = zalloc(sizeof(struct sevent) * len
[PATCH v16 15/16] perf, tools: Handle header line in mapfile
From: Andi Kleen a...@linux.intel.com To work with existing mapfiles, assume that the first line in 'mapfile.csv' is a header line and skip over it. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v2] All architectures may not use the Family to identify. So, assume first line is header. --- tools/perf/pmu-events/jevents.c |9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ea3474b..7347cca 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -462,7 +462,12 @@ static int process_mapfile(FILE *outfp, char *fpath) print_mapping_table_prefix(outfp); - line_num = 0; + /* Skip first line (header) */ + p = fgets(line, n, mapfp); + if (!p) + goto out; + + line_num = 1; while (1) { char *cpuid, *version, *type, *fname; @@ -506,8 +511,8 @@ static int process_mapfile(FILE *outfp, char *fpath) fprintf(outfp, },\n); } +out: print_mapping_table_suffix(outfp); - return 0; } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v16 02/16] perf, tools, jevents: Program to convert JSON file to C style file
From: Andi Kleen a...@linux.intel.com This is a modified version of an earlier patch by Andi Kleen. We expect architectures to describe the performance monitoring events for each CPU in a corresponding JSON file, which look like: [ { EventCode: 0x00, UMask: 0x01, EventName: INST_RETIRED.ANY, BriefDescription: Instructions retired from execution., PublicDescription: Instructions retired from execution., Counter: Fixed counter 1, CounterHTOff: Fixed counter 1, SampleAfterValue: 203, SampleAfterValue: 203, MSRIndex: 0, MSRValue: 0, TakenAlone: 0, CounterMask: 0, Invert: 0, AnyThread: 0, EdgeDetect: 0, PEBS: 0, PRECISE_STORE: 0, Errata: null, Offcore: 0 } ] We also expect the architectures to provide a mapping between individual CPUs to their JSON files. Eg: GenuineIntel-6-1E,V1,/NHM-EP/NehalemEP_core_V1.json,core which maps each CPU, identified by [vendor, family, model, version, type] to a JSON file. Given these files, the program, jevents:: - locates all JSON files for the architecture, - parses each JSON file and generates a C-style PMU-events table (pmu-events.c) - locates a mapfile for the architecture - builds a global table, mapping each model of CPU to the corresponding PMU-events table. The 'pmu-events.c' is generated when building perf and added to libperf.a. The global table pmu_events_map[] table in this pmu-events.c will be used in perf in a follow-on patch. If the architecture does not have any JSON files or there is an error in processing them, an empty mapping file is created. This would allow the build of perf to proceed even if we are not able to provide aliases for events. The parser for JSON files allows parsing Intel style JSON event files. This allows to use an Intel event list directly with perf. The Intel event lists can be quite large and are too big to store in unswappable kernel memory. The conversion from JSON to C-style is straight forward. The parser knows (very little) Intel specific information, and can be easily extended to handle fields for other CPUs. The parser code is partially shared with an independent parsing library, which is 2-clause BSD licenced. To avoid any conflicts I marked those files as BSD licenced too. As part of perf they become GPLv2. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Address review feedback. Rename option to --event-files v3: Add JSON example v4: Update manpages. v5: Don't remove dot in fixname. Fix compile error. Add include protection. Comment realloc. v6: Include debug/util.h v7: (Sukadev Bhattiprolu) Rebase to 4.0 and fix some conflicts. v8: (Sukadev Bhattiprolu) Move jevents.[hc] to tools/perf/pmu-events/ Rewrite to locate and process arch specific JSON and map files; and generate a C file. (Removed acked-by Namhyung Kim due to modest changes to patch) Compile the generated pmu-events.c and add the pmu-events.o to libperf.a v9: [Sukadev Bhattiprolu/Andi Kleen] Rename -vfm to -cpuid and use that field to encode the PVR in Power. Allow blank lines in mapfile. [Jiri Olsa] Pass ARCH as a parameter to jevents so we don't have to detect it. [Jiri Olsa] Use the infrastrastructure to build pmu-events/perf (Makefile changes from Jiri included in this patch). [Jiri Olsa, Andi Kleen] Detect changes to JSON files and rebuild pmu-events.o only if necessary. v11:- [Andi Kleen] Add mapfile, jevents dependency on pmu-events.c - [Jiri Olsa] Be silient if arch doesn't have JSON files - Also silence 'jevents' when parsing JSON files unless V=1 is specified during build. Cleanup error messages. v14:- - [Jiri Olsa] Fix compile error with DEBUG=1; drop unlink() and use w mode with fopen(); simplify file_name_to_table_name() v15:- Fix minor conflict in tools/perf/Makefile.perf when rebasing to recent perf/core. v16:- Rebase to upstream; fix conflicts in tools/perf/Makefile.perf --- tools/perf/Makefile.perf | 26 +- tools/perf/pmu-events/Build| 11 + tools/perf/pmu-events/jevents.c| 686 tools/perf/pmu-events/jevents.h| 17 + tools/perf/pmu-events/json.h |3 + tools/perf/pmu-events/pmu-events.h | 35 ++ 6 files changed, 774 insertions(+), 4 deletions(-) create mode 100644 tools/perf/pmu-events/Build create mode 100644 tools/perf/pmu-events/jevents.c create mode 100644 tools/perf/pmu-events/jevents.h create mode 100644 tools/perf/pmu-events/pmu-events.h diff --git a/tools/perf/Makefile.perf b/tools/perf
[PATCH v16 01/16] perf, tools: Add jsmn `jasmine' JSON parser
From: Andi Kleen a...@linux.intel.com I need a JSON parser. This adds the simplest JSON parser I could find -- Serge Zaitsev's jsmn `jasmine' -- to the perf library. I merely converted it to (mostly) Linux style and added support for non 0 terminated input. The parser is quite straight forward and does not copy any data, just returns tokens with offsets into the input buffer. So it's relatively efficient and simple to use. The code is not fully checkpatch clean, but I didn't want to completely fork the upstream code. Original source: http://zserge.bitbucket.org/jsmn.html In addition I added a simple wrapper that mmaps a json file and provides some straight forward access functions. Used in follow-on patches to parse event files. Acked-by: Namhyung Kim namhy...@kernel.org Acked-by: Jiri Olsa jo...@redhat.com Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- v2: Address review feedback. v3: Minor checkpatch fixes. v4 (by Sukadev Bhattiprolu) - Rebase to 4.0 and fix minor conflicts in tools/perf/Makefile.perf - Report error if specified events file is invalid. v5 (Sukadev Bhattiprolu) - Move files to tools/perf/pmu-events/ since parsing of JSON file now occurs when _building_ rather than running perf. --- tools/perf/pmu-events/jsmn.c | 313 ++ tools/perf/pmu-events/jsmn.h | 67 + tools/perf/pmu-events/json.c | 162 ++ tools/perf/pmu-events/json.h | 36 + 4 files changed, 578 insertions(+) create mode 100644 tools/perf/pmu-events/jsmn.c create mode 100644 tools/perf/pmu-events/jsmn.h create mode 100644 tools/perf/pmu-events/json.c create mode 100644 tools/perf/pmu-events/json.h diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c new file mode 100644 index 000..11d1fa1 --- /dev/null +++ b/tools/perf/pmu-events/jsmn.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2010 Serge A. Zaitsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Slightly modified by AK to not assume 0 terminated input. + */ + +#include stdlib.h +#include jsmn.h + +/* + * Allocates a fresh unused token from the token pool. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *tok; + + if ((unsigned)parser-toknext = num_tokens) + return NULL; + tok = tokens[parser-toknext++]; + tok-start = tok-end = -1; + tok-size = 0; + return tok; +} + +/* + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) +{ + token-type = type; + token-start = start; + token-end = end; + token-size = 0; +} + +/* + * Fills next available token with JSON primitive. + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *token; + int start; + + start = parser-pos; + + for (; parser-pos len; parser-pos++) { + switch (js[parser-pos]) { +#ifndef JSMN_STRICT + /* +* In strict mode primitive must be followed by , +* or } or ] +*/ + case ':': +#endif + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ']': + case '}': + goto found; + default: + break; + } + if (js[parser-pos] 32 || js[parser-pos] = 127) { + parser-pos = start; + return JSMN_ERROR_INVAL
[PATCH v5 1/8] perf: Add a flags parameter to pmu txn interfaces
Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a transaction type. The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v4] - [Peter Zijlstra] Fix an copy-paste error in power_pmu_cancel_txn(). - [Peter Zijlstra] Use __this_cpu_read() and __this_cpu_write(). Changelog[v3] - [Peter Zijlstra] Ensure the nop_txn interfaces disable/enable PMU only for TXN_ADD transactions. - [Peter Zijlstra] Cache the flags parameter in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). --- arch/powerpc/perf/core-book3s.c | 25 - arch/s390/kernel/perf_cpum_cf.c | 24 +++- arch/sparc/kernel/perf_event.c | 19 ++- arch/x86/kernel/cpu/perf_event.c | 27 +-- arch/x86/kernel/cpu/perf_event.h |1 + include/linux/perf_event.h | 14 +++--- kernel/events/core.c | 31 --- 7 files changed, 130 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index d90893b..b18efe4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -50,6 +50,7 @@ struct cpu_hw_events { unsigned int group_flag; int n_txn_start; + int txn_flags; /* BHRB bits */ u64 bhrb_filter;/* BHRB HW branch filter */ @@ -1586,11 +1587,19 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + cpuhw-txn_flags = txn_flags; + if (txn_flags ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw-group_flag |= PERF_EVENT_TXN; cpuhw-n_txn_start = cpuhw-n_events; @@ -1604,6 +1613,12 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + int txn_flags; + + txn_flags = cpuhw-txn_flags; + cpuhw-txn_flags = 0; + if (txn_flags ~PERF_PMU_TXN_ADD) + return; cpuhw-group_flag = ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1618,10 +1633,18 @@ static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; + int txn_flags; if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(cpu_hw_events); + + txn_flags = cpuhw-txn_flags; + cpuhw-txn_flags = 0; + if (cpuhw-txn_flags ~PERF_PMU_TXN_ADD) + return 0; + n = cpuhw-n_events; if (check_excludes(cpuhw-event, cpuhw-flags, 0, n)) return -EAGAIN; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 56fdad4..a6f9e7b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_tctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned intflags; + int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -572,11 +574,19 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void
[PATCH v5 0/8] perf: Implement group-read of events using txn interface
Unlike normal hardware PMCs, the 24x7 counters in Power8 are stored in memory and accessed via a hypervisor call (HCALL). A major aspect of the HCALL is that it allows retireving _several_ counters at once (unlike regular PMCs, which are read one at a time). By reading several counters at once, we can get a more consistent snapshot of the system. This patchset extends the transaction interface to accomplish submitting several events to the PMU and have the PMU read them all at once. User is expected to submit the set of events they want to read as an event group. In the kernel, we submit each event to the PMU using the following logic (from Peter Zijlstra). pmu-start_txn(pmu, PMU_TXN_READ); leader-read(); for_each_sibling() sibling-read(); pmu-commit_txn(); where: - the -read()s queue events to be submitted to the hypervisor, and, - the -commit_txn() issues the HCALL, retrieves the result and updates the event count. Architectures/PMUs that don't need/implement PMU_TXN_READ type of transactions, simply ignore the -start_txn() and -commit_txn() and continue to read the counters one at a time in the -read() call. Compile/touch tested on x86. Need help testing on s390 and Sparc. Thanks to Peter Zijlstra for his input/code. Changelog[v5] - Invert the sibling-child loop nesting in perf-read-group (re-org code and drop the patch that defined perf_event_aggregate()). Changelog[v4] - Ensure all the transactions operations happen on the same CPU so PMUs can use per-CPU buffers for the transaction. - Add lockdep assert and fix a locking issue in perf_read_group(). Changelog [v3] - Simple changes/reorg of patchset to split/rename functions - [Peter Zijlstra] Save the transaction flags in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). - [Peter Zijlstra] The nop txn interfaces don't need to disable/enable PMU for PERF_PMU_TXN_READ transactions. Changelog [v2] - Use the transaction interface unconditionally to avoid special-case code. Architectures/PMUs that don't need the READ transaction types simply ignore the -start_txn() and -commit_txn() calls. Peter Zijlstra (2): perf: Add group reads to perf_event_read() perf: Invert perf_read_group() loops Peter Zijlstra (Intel) (1): perf: Rename perf_event_read_{one,group}, perf_read_hw Sukadev Bhattiprolu (5): perf: Add a flags parameter to pmu txn interfaces perf: Split perf_event_read() and perf_event_count() perf: Add return value for perf_event_read(). Define PERF_PMU_TXN_READ interface powerpc/perf/hv-24x7: Use PMU_TXN_READ interface arch/powerpc/perf/core-book3s.c | 25 - arch/powerpc/perf/hv-24x7.c | 166 +- arch/s390/kernel/perf_cpum_cf.c | 24 - arch/sparc/kernel/perf_event.c | 19 +++- arch/x86/kernel/cpu/perf_event.c | 27 - arch/x86/kernel/cpu/perf_event.h |1 + include/linux/perf_event.h | 15 ++- kernel/events/core.c | 210 +- 8 files changed, 429 insertions(+), 58 deletions(-) -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 2/8] perf: Split perf_event_read() and perf_event_count()
perf_event_read() does two things: - call the PMU to read/update the counter value, and - compute the total count of the event and its children Not all callers need both. perf_event_reset() for instance needs the first piece but doesn't need the second. Similarly, when we implement the ability to read a group of events using the transaction interface, we would need the two pieces done independently. Break up perf_event_read() and have it just read/update the counter and have the callers compute the total count if necessary. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4435bf5..f9ca8cb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,7 +3212,7 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static u64 perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3238,8 +3238,6 @@ static u64 perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(ctx-lock, flags); } - - return perf_event_count(event); } /* @@ -3751,14 +3749,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(event-child_mutex); - total += perf_event_read(event); + + perf_event_read(event); + total += perf_event_count(event); + *enabled += event-total_time_enabled + atomic64_read(event-child_total_time_enabled); *running += event-total_time_running + atomic64_read(event-child_total_time_running); list_for_each_entry(child, event-child_list, child_list) { - total += perf_event_read(child); + perf_event_read(child); + total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; } @@ -3918,7 +3920,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + perf_event_read(event); local64_set(event-count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 3/8] perf: Rename perf_event_read_{one, group}, perf_read_hw
From: Peter Zijlstra (Intel) pet...@infradead.org In order to free up the perf_event_read_group() name: s/perf_event_read_\(one\|group\)/perf_read_\1/g s/perf_read_hw/__perf_read/g Signed-off-by: Peter Zijlstra (Intel) pet...@infradead.org --- kernel/events/core.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f9ca8cb..02095f4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3675,7 +3675,7 @@ static void put_event(struct perf_event *event) * see the comment there. * * 2) there is a lock-inversion with mmap_sem through -* perf_event_read_group(), which takes faults while +* perf_read_group(), which takes faults while * holding ctx-mutex, however this is called after * the last filedesc died, so there is no possibility * to trigger the AB-BA case. @@ -3770,7 +3770,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_group(struct perf_event *event, +static int perf_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event-group_leader, *sub; @@ -3818,7 +3818,7 @@ static int perf_event_read_group(struct perf_event *event, return ret; } -static int perf_event_read_one(struct perf_event *event, +static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; @@ -3856,7 +3856,7 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, size_t count) { u64 read_format = event-attr.read_format; int ret; @@ -3874,9 +3874,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event-ctx-parent_ctx); if (read_format PERF_FORMAT_GROUP) - ret = perf_event_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf); else - ret = perf_event_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf); return ret; } @@ -3889,7 +3889,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ctx = perf_event_ctx_lock(event); - ret = perf_read_hw(event, buf, count); + ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); return ret; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 6/8] perf: Add return value for perf_event_read().
When we implement the ability to read several counters at once (using the PERF_PMU_TXN_READ transaction interface), perf_event_read() can fail when the 'group' parameter is true (eg: trying to read too many events at once). For now, have perf_event_read() return an integer. Ignore the return value when 'group' parameter is false. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 45 ++--- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2221ebe..e3ce047 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3177,6 +3177,7 @@ void perf_event_exec(void) struct perf_read_data { struct perf_event *event; bool group; + int ret; }; /* @@ -3217,6 +3218,7 @@ static void __perf_event_read(void *info) if (sub-state == PERF_EVENT_STATE_ACTIVE) sub-pmu-read(sub); } + data-ret = 0; unlock: raw_spin_unlock(ctx-lock); @@ -3230,8 +3232,10 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static void perf_event_read(struct perf_event *event, bool group) +static int perf_event_read(struct perf_event *event, bool group) { + int ret = 0; + /* * If event is enabled and currently active on a CPU, update the * value in the event structure: @@ -3240,9 +3244,11 @@ static void perf_event_read(struct perf_event *event, bool group) struct perf_read_data data = { .event = event, .group = group, + .ret = 0, }; smp_call_function_single(event-oncpu, __perf_event_read, data, 1); + ret = data.ret; } else if (event-state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event-ctx; unsigned long flags; @@ -3263,6 +3269,8 @@ static void perf_event_read(struct perf_event *event, bool group) update_event_times(event); raw_spin_unlock_irqrestore(ctx-lock, flags); } + + return ret; } /* @@ -3775,7 +3783,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); - perf_event_read(event, false); + (void)perf_event_read(event, false); total += perf_event_count(event); *enabled += event-total_time_enabled + @@ -3784,7 +3792,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(event-child_total_time_running); list_for_each_entry(child, event-child_list, child_list) { - perf_event_read(child, false); + (void)perf_event_read(child, false); total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; @@ -3795,13 +3803,16 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static void __perf_read_group_add(struct perf_event *leader, +static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event *sub; int n = 1; /* skip @nr */ + int ret; - perf_event_read(leader, true); + ret = perf_event_read(leader, true); + if (ret) + return ret; /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -3830,6 +3841,8 @@ static void __perf_read_group_add(struct perf_event *leader, if (read_format PERF_FORMAT_ID) values[n++] = primary_event_id(sub); } + + return 0; } static int perf_read_group(struct perf_event *event, @@ -3837,7 +3850,7 @@ static int perf_read_group(struct perf_event *event, { struct perf_event *leader = event-group_leader, *child; struct perf_event_context *ctx = leader-ctx; - int ret = event-read_size; + int ret; u64 *values; lockdep_assert_held(ctx-mutex); @@ -3854,17 +3867,27 @@ static int perf_read_group(struct perf_event *event, */ mutex_lock(leader-child_mutex); - __perf_read_group_add(leader, read_format, values); - list_for_each_entry(child, leader-child_list, child_list) - __perf_read_group_add(child, read_format, values); + ret = __perf_read_group_add(leader, read_format, values); + if (ret) + goto unlock; + + list_for_each_entry(child, leader-child_list, child_list) { + ret = __perf_read_group_add(child, read_format, values); + if (ret
[PATCH v5 8/8] powerpc/perf/hv-24x7: Use PERF_PMU_TXN_READ interface
The 24x7 counters in Powerpc allow monitoring a large number of counters simultaneously. They also allow reading several counters in a single HCALL so we can get a more consistent snapshot of the system. Use the PMU's transaction interface to monitor and read several event counters at once. The idea is that users can group several 24x7 events into a single group of events. We use the following logic to submit the group of events to the PMU and read the values: pmu-start_txn()// Initialize before first event for each event in group pmu-read(event); // Queue each event to be read pmu-commit_txn() // Read/update all queuedcounters The -commit_txn() also updates the event counts in the respective perf_event objects. The perf subsystem can then directly get the event counts from the perf_event and can avoid submitting a new -read() request to the PMU. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v3] - [Peter Zijlstra] Save the transaction state in -start_txn() and remove the flags parameter from -commit_txn() and -cancel_txn(). --- arch/powerpc/perf/hv-24x7.c | 166 ++- 1 file changed, 164 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 4d1a8d1..c4eee39 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; +DEFINE_PER_CPU(int, hv_24x7_txn_flags); +DEFINE_PER_CPU(int, hv_24x7_txn_err); + +struct hv_24x7_hw { + struct perf_event *events[255]; +}; + +DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); + /* * request_buffer and result_buffer are not required to be 4k aligned, * but are not allowed to cross any 4k boundary. Aligning them to 4k is @@ -1233,9 +1242,48 @@ static void update_event_count(struct perf_event *event, u64 now) static void h_24x7_event_read(struct perf_event *event) { u64 now; + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_hw *h24x7hw; + int txn_flags; + + txn_flags = __this_cpu_read(hv_24x7_txn_flags); + + /* +* If in a READ transaction, add this counter to the list of +* counters to read during the next HCALL (i.e commit_txn()). +* If not in a READ transaction, go ahead and make the HCALL +* to read this counter by itself. +*/ + + if (txn_flags PERF_PMU_TXN_READ) { + int i; + int ret; - now = h_24x7_get_value(event); - update_event_count(event, now); + if (__this_cpu_read(hv_24x7_txn_err)) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + + ret = add_event_to_24x7_request(event, request_buffer); + if (ret) { + __this_cpu_write(hv_24x7_txn_err, ret); + } else { + /* +* Assoicate the event with the HCALL request index, +* so -commit_txn() can quickly find/update count. +*/ + i = request_buffer-num_requests - 1; + + h24x7hw = get_cpu_var(hv_24x7_hw); + h24x7hw-events[i] = event; + put_cpu_var(h24x7hw); + } + + put_cpu_var(hv_24x7_reqb); + } else { + now = h_24x7_get_value(event); + update_event_count(event, now); + } } static void h_24x7_event_start(struct perf_event *event, int flags) @@ -1257,6 +1305,117 @@ static int h_24x7_event_add(struct perf_event *event, int flags) return 0; } +/* + * 24x7 counters only support READ transactions. They are + * always counting and dont need/support ADD transactions. + * Cache the flags, but otherwise ignore transactions that + * are not PERF_PMU_TXN_READ. + */ +static void h_24x7_event_start_txn(struct pmu *pmu, int flags) +{ + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_data_result_buffer *result_buffer; + + /* We should not be called if we are already in a txn */ + WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags)); + + __this_cpu_write(hv_24x7_txn_flags, flags); + if (flags ~PERF_PMU_TXN_READ) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); + + init_24x7_request(request_buffer, result_buffer); + + put_cpu_var(hv_24x7_resb); + put_cpu_var(hv_24x7_reqb); +} + +/* + * Clean up transaction state. + * + * NOTE: Ignore state of request and result buffers for now. + * We will initialize them during the next read/txn. + */ +static void
[PATCH v5 4/8] perf: Add group reads to perf_event_read()
From: Peter Zijlstra pet...@infradead.org Enable perf_event_read() to update entire groups at once, this will be useful for read transactions. Cc: Ingo Molnar mi...@redhat.com Cc: Arnaldo Carvalho de Melo a...@kernel.org Cc: Michael Ellerman m...@ellerman.id.au Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Signed-off-by: Peter Zijlstra (Intel) pet...@infradead.org Link: http://lkml.kernel.org/r/20150723080435.ge25...@twins.programming.kicks-ass.net --- kernel/events/core.c | 39 --- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 02095f4..31ec842 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3174,12 +3174,18 @@ void perf_event_exec(void) rcu_read_unlock(); } +struct perf_read_data { + struct perf_event *event; + bool group; +}; + /* * Cross CPU call to read the hardware event */ static void __perf_event_read(void *info) { - struct perf_event *event = info; + struct perf_read_data *data = info; + struct perf_event *sub, *event = data-event; struct perf_event_context *ctx = event-ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -3198,9 +3204,21 @@ static void __perf_event_read(void *info) update_context_time(ctx); update_cgrp_time_from_event(event); } + update_event_times(event); if (event-state == PERF_EVENT_STATE_ACTIVE) event-pmu-read(event); + + if (!data-group) + goto unlock; + + list_for_each_entry(sub, event-sibling_list, group_entry) { + update_event_times(sub); + if (sub-state == PERF_EVENT_STATE_ACTIVE) + sub-pmu-read(sub); + } + +unlock: raw_spin_unlock(ctx-lock); } @@ -3212,15 +3230,19 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static void perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event, bool group) { /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ if (event-state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data = { + .event = event, + .group = group, + }; smp_call_function_single(event-oncpu, -__perf_event_read, event, 1); +__perf_event_read, data, 1); } else if (event-state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event-ctx; unsigned long flags; @@ -3235,7 +3257,10 @@ static void perf_event_read(struct perf_event *event) update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + if (group) + update_group_times(event); + else + update_event_times(event); raw_spin_unlock_irqrestore(ctx-lock, flags); } } @@ -3750,7 +3775,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); - perf_event_read(event); + perf_event_read(event, false); total += perf_event_count(event); *enabled += event-total_time_enabled + @@ -3759,7 +3784,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(event-child_total_time_running); list_for_each_entry(child, event-child_list, child_list) { - perf_event_read(child); + perf_event_read(child, false); total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; @@ -3920,7 +3945,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event); + perf_event_read(event, false); local64_set(event-count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 7/8] Define PERF_PMU_TXN_READ interface
Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu-start_txn()// Initialize before first event for each event in group pmu-read(event); // Queue each event to be read rc = pmu-commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the -read() operation to _queue_ the counters to be read and use -commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore -start_txn() and -commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v4] - [Peter Zijlstra] Add lockdep_assert_held() in perf_event_read_group(). Make sure the entire transaction happens on the same CPU. --- include/linux/perf_event.h |1 + kernel/events/core.c | 24 +++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 44bf05f..da307ad 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -169,6 +169,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags diff --git a/kernel/events/core.c b/kernel/events/core.c index e3ce047..fde2f43 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3189,6 +3189,7 @@ static void __perf_event_read(void *info) struct perf_event *sub, *event = data-event; struct perf_event_context *ctx = event-ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct pmu *pmu = event-pmu; /* * If this is a task context, we need to check whether it is @@ -3207,18 +3208,31 @@ static void __perf_event_read(void *info) } update_event_times(event); - if (event-state == PERF_EVENT_STATE_ACTIVE) - event-pmu-read(event); + if (event-state != PERF_EVENT_STATE_ACTIVE) + goto unlock; - if (!data-group) + if (!data-group) { + pmu-read(event); + data-ret = 0; goto unlock; + } + + pmu-start_txn(pmu, PERF_PMU_TXN_READ); + + pmu-read(event); list_for_each_entry(sub, event-sibling_list, group_entry) { update_event_times(sub); - if (sub-state == PERF_EVENT_STATE_ACTIVE) + if (sub-state == PERF_EVENT_STATE_ACTIVE) { + /* +* Use sibling's PMU rather than @event's since +* sibling could be on different (eg: software) PMU. +*/ sub-pmu-read(sub); + } } - data-ret = 0; + + data-ret = pmu-commit_txn(pmu); unlock: raw_spin_unlock(ctx-lock); -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 5/8] perf: Invert perf_read_group() loops
From: Peter Zijlstra pet...@infradead.org In order to enable the use of perf_event_read(.group = true), we need to invert the sibling-child loop nesting of perf_read_group(). Currently we iterate the child list for each sibling, this precludes using group reads. Flip things around so we iterate each group for each child. Signed-off-by: Peter Zijlstra (Intel) pet...@infradead.org Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changes to Peter's patch: - Add GFP_KERNEL to kzalloc(). - Pass in address of counter to atomic_read(). - Return event-size rather than leader-size (perf_read_group()) - Keep chkpatch happy. --- kernel/events/core.c | 85 -- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 31ec842..2221ebe 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3795,50 +3795,75 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_read_group(struct perf_event *event, - u64 read_format, char __user *buf) +static void __perf_read_group_add(struct perf_event *leader, + u64 read_format, u64 *values) { - struct perf_event *leader = event-group_leader, *sub; - struct perf_event_context *ctx = leader-ctx; - int n = 0, size = 0, ret; - u64 count, enabled, running; - u64 values[5]; + struct perf_event *sub; + int n = 1; /* skip @nr */ - lockdep_assert_held(ctx-mutex); + perf_event_read(leader, true); + + /* +* Since we co-schedule groups, {enabled,running} times of siblings +* will be identical to those of the leader, so we only publish one +* set. +*/ + if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] += leader-total_time_enabled + + atomic64_read(leader-child_total_time_enabled); + } - count = perf_event_read_value(leader, enabled, running); + if (read_format PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] += leader-total_time_running + + atomic64_read(leader-child_total_time_running); + } - values[n++] = 1 + leader-nr_siblings; - if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = enabled; - if (read_format PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = running; - values[n++] = count; + /* +* Write {count,id} tuples for every sibling. +*/ + values[n++] += perf_event_count(leader); if (read_format PERF_FORMAT_ID) values[n++] = primary_event_id(leader); - size = n * sizeof(u64); + list_for_each_entry(sub, leader-sibling_list, group_entry) { + values[n++] += perf_event_count(sub); + if (read_format PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + } +} - if (copy_to_user(buf, values, size)) - return -EFAULT; +static int perf_read_group(struct perf_event *event, + u64 read_format, char __user *buf) +{ + struct perf_event *leader = event-group_leader, *child; + struct perf_event_context *ctx = leader-ctx; + int ret = event-read_size; + u64 *values; - ret = size; + lockdep_assert_held(ctx-mutex); - list_for_each_entry(sub, leader-sibling_list, group_entry) { - n = 0; + values = kzalloc(event-read_size, GFP_KERNEL); + if (!values) + return -ENOMEM; - values[n++] = perf_event_read_value(sub, enabled, running); - if (read_format PERF_FORMAT_ID) - values[n++] = primary_event_id(sub); + values[0] = 1 + leader-nr_siblings; + + /* +* By locking the child_mutex of the leader we effectively +* lock the child list of all siblings.. XXX explain how. +*/ + mutex_lock(leader-child_mutex); - size = n * sizeof(u64); + __perf_read_group_add(leader, read_format, values); + list_for_each_entry(child, leader-child_list, child_list) + __perf_read_group_add(child, read_format, values); - if (copy_to_user(buf + ret, values, size)) { - return -EFAULT; - } + mutex_unlock(leader-child_mutex); - ret += size; - } + if (copy_to_user(buf, values, event-read_size)) + ret = -EFAULT; + + kfree(values); return ret; } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 09/10] Define PERF_PMU_TXN_READ interface
Peter Zijlstra [pet...@infradead.org] wrote: | On Tue, Aug 11, 2015 at 09:14:00PM -0700, Sukadev Bhattiprolu wrote: | | +static void __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) | | { | | + struct perf_event *sub; | | + int n = 1; /* skip @nr */ | | This n = 1 is to skip over the values[0] = 1 + nr_siblings in the | caller. | | Anyway, in __perf_read_group_add() we always start with n = 1, however | ... | | | | + perf_event_read(leader, true); | | + | | + /* | | + * Since we co-schedule groups, {enabled,running} times of siblings | | + * will be identical to those of the leader, so we only publish one | | + * set. | | + */ | | + if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) { | | + values[n++] += leader-total_time_enabled + | | + atomic64_read(leader-child_total_time_enabled); | | Note how this is an in-place addition, Ah, yes, Sorry I missed that. It make sense now and my tests seem to be running fine. | | | + } | | | | + if (read_format PERF_FORMAT_TOTAL_TIME_RUNNING) { | | + values[n++] += leader-total_time_running + | | + atomic64_read(leader-child_total_time_running); | | and here, | | | + } | | | | + /* | | + * Write {count,id} tuples for every sibling. | | + */ | | + values[n++] += perf_event_count(leader); | | and here, | | | | if (read_format PERF_FORMAT_ID) | | values[n++] = primary_event_id(leader); | | and this will always assign the same value. | | | + list_for_each_entry(sub, leader-sibling_list, group_entry) { | | + values[n++] += perf_event_count(sub); | | + if (read_format PERF_FORMAT_ID) | | + values[n++] = primary_event_id(sub); | | Same for these, therefore, | | | + } | | +} | | | | +static int perf_read_group(struct perf_event *event, | | +u64 read_format, char __user *buf) | | +{ | | + struct perf_event *leader = event-group_leader, *child; | | + struct perf_event_context *ctx = leader-ctx; | | + int ret = leader-read_size; One other question, We return leader-read_size but allocate/copy_to_user the sibling's event-read_size. We consistently use read_format from the 'event' being read, rather than its 'group_leader', so we are ok in terms of what we copy into values[] for each event in the group. But, can the leader's read_format (and hence its read_size) differ from its sibling's read_size? If so, in the current code, we return the event's read_size but in the new code, we return the leader's read_size. | | + u64 *values; | | | | + lockdep_assert_held(ctx-mutex); | | | | + values = kzalloc(event-read_size); | | + if (!values) | | + return -ENOMEM; | | | | + values[0] = 1 + leader-nr_siblings; | | | | + /* | | + * By locking the child_mutex of the leader we effectively | | + * lock the child list of all siblings.. XXX explain how. | | + */ | | + mutex_lock(leader-child_mutex); | | | | + __perf_read_group_add(leader, read_format, values); | | ... we don't copy_to_user() here, | | | + list_for_each_entry(child, leader-child_list, child_list) | | + __perf_read_group_add(child, read_format, values); | | so won't we overwrite the values[], if we always start at n = 1 | in __perf_read_group_add()? | | yes and no, we have to re-iterate the same values for each child as they | all have the same group, but we add the time and count fields, we do not | overwrite. The _add() suffix was supposed to be a hint ;-) | | | + mutex_unlock(leader-child_mutex); | | + | | + if (copy_to_user(buf, values, event-read_size)) | | + ret = -EFAULT; | | + | | + kfree(values); | | | | return ret; | | } | | Where previously we would iterate the group and for each member | iterate/sum all the child values together before copying the value out, | we now, because we need to read groups together, need to first iterate | the child list and sum whole groups. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V15 00/19] perf, tools: Add support for PMU events in JSON format
Sukadev Bhattiprolu [suka...@linux.vnet.ibm.com] wrote: | CPUs support a large number of performance monitoring events (PMU events) | and often these events are very specific to an architecture/model of the | CPU. To use most of these PMU events with perf, we currently have to identify | them by their raw codes: | | perf stat -e r100f2 sleep 1 | | This patchset allows architectures to specify these PMU events in JSON | files located in 'tools/perf/pmu-events/arch/' of the mainline tree. | The events from the JSON files for the architecture are then built into | the perf binary. | | At run time, perf identifies the specific set of events for the CPU and | creates event aliases. These aliases allow users to specify events by | name as: | | perf stat -e pm_1plus_ppc_cmpl sleep 1 | | The file, 'tools/perf/pmu-events/README' in [PATCH 19/19] gives more | details. | | Note: | - All known events tables for the architecture are included in the | perf binary. | | - For architectures that don't have any JSON files, an empty mapping | table is created and they should continue to build) | | Thanks to input from Andi Kleen, Jiri Olsa, Namhyung Kim and Ingo Molnar. | | These patches are available from: | | https://github.com:sukadev/linux.git | | Branch Description | -- | json-v15Source Code only | json-files-5x86 and Powerpc datafiles only | json-v15-with-data Both code and data (build/test) | | NOTE: Only source code patches (i.e those in json-v15) are being emailed. | Please pull the data files from the json-files-5 branch. Ingo, Arnaldo, Any updates on this patchset? They have been reviewed/acked by Jiri and we have not received any comments since then. Is there a chance this can make it into 4.3? Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 09/10] Define PERF_PMU_TXN_READ interface
Peter Zijlstra [pet...@infradead.org] wrote: | On Sun, Jul 26, 2015 at 10:40:37PM -0700, Sukadev Bhattiprolu wrote: | @@ -3743,7 +3762,13 @@ static u64 perf_event_aggregate(struct perf_event *event, u64 *enabled, | lockdep_assert_held(event-child_mutex); | | list_for_each_entry(child, event-child_list, child_list) { | +#if 0 | + /* | +* TODO: Do we need this read() for group events on PMUs that | +* don't implement PERF_PMU_TXN_READ transactions? | +*/ | (void)perf_event_read(child, false); | +#endif | total += perf_event_count(child); | *enabled += child-total_time_enabled; | *running += child-total_time_running; | | Aw gawd, I've been an idiot!! | | I just realized this is a _CHILD_ loop, not a _SIBLING_ loop !! | | We need to flip the loops in perf_read_group(), find attached two | patches that go on top of 1,2,4. | | After this you can add the perf_event_read() return value (just fold | patches 6,8) after which you can do patch 10 (which has a broken | Subject fwiw). Thanks for the patches. I am building and testing, but have a question on the second patch below: snip | Subject: perf: Invert perf_read_group() loops | From: Peter Zijlstra pet...@infradead.org | Date: Thu Aug 6 13:41:13 CEST 2015 | | In order to enable the use of perf_event_read(.group = true), we need | to invert the sibling-child loop nesting of perf_read_group(). | | Currently we iterate the child list for each sibling, this precludes | using group reads. Flip things around so we iterate each group for | each child. | | Signed-off-by: Peter Zijlstra (Intel) pet...@infradead.org | --- | kernel/events/core.c | 84 --- | 1 file changed, 54 insertions(+), 30 deletions(-) | | --- a/kernel/events/core.c | +++ b/kernel/events/core.c | @@ -3809,50 +3809,74 @@ u64 perf_event_read_value(struct perf_ev | } | EXPORT_SYMBOL_GPL(perf_event_read_value); | | -static int perf_read_group(struct perf_event *event, | -u64 read_format, char __user *buf) | +static void __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) | { | - struct perf_event *leader = event-group_leader, *sub; | - struct perf_event_context *ctx = leader-ctx; | - int n = 0, size = 0, ret; | - u64 count, enabled, running; | - u64 values[5]; | + struct perf_event *sub; | + int n = 1; /* skip @nr */ This n = 1 is to skip over the values[0] = 1 + nr_siblings in the caller. Anyway, in __perf_read_group_add() we always start with n = 1, however ... | | - lockdep_assert_held(ctx-mutex); | + perf_event_read(leader, true); | + | + /* | + * Since we co-schedule groups, {enabled,running} times of siblings | + * will be identical to those of the leader, so we only publish one | + * set. | + */ | + if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) { | + values[n++] += leader-total_time_enabled + | + atomic64_read(leader-child_total_time_enabled); | + } | | - count = perf_event_read_value(leader, enabled, running); | + if (read_format PERF_FORMAT_TOTAL_TIME_RUNNING) { | + values[n++] += leader-total_time_running + | + atomic64_read(leader-child_total_time_running); | + } | | - values[n++] = 1 + leader-nr_siblings; | - if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) | - values[n++] = enabled; | - if (read_format PERF_FORMAT_TOTAL_TIME_RUNNING) | - values[n++] = running; | - values[n++] = count; | + /* | + * Write {count,id} tuples for every sibling. | + */ | + values[n++] += perf_event_count(leader); | if (read_format PERF_FORMAT_ID) | values[n++] = primary_event_id(leader); | | - size = n * sizeof(u64); | + list_for_each_entry(sub, leader-sibling_list, group_entry) { | + values[n++] += perf_event_count(sub); | + if (read_format PERF_FORMAT_ID) | + values[n++] = primary_event_id(sub); | + } | +} | | - if (copy_to_user(buf, values, size)) | - return -EFAULT; | +static int perf_read_group(struct perf_event *event, | +u64 read_format, char __user *buf) | +{ | + struct perf_event *leader = event-group_leader, *child; | + struct perf_event_context *ctx = leader-ctx; | + int ret = leader-read_size; | + u64 *values; | | - ret = size; | + lockdep_assert_held(ctx-mutex); | | - list_for_each_entry(sub, leader-sibling_list, group_entry) { | - n = 0; | + values = kzalloc(event-read_size); | + if (!values) | + return -ENOMEM; | | - values[n++] = perf_event_read_value(sub, enabled, running); | - if (read_format PERF_FORMAT_ID
[PATCH v4 0/10] Implement group-read of events using txn interface
Unlike normal hardware PMCs, the 24x7 counters in Power8 are stored in memory and accessed via a hypervisor call (HCALL). A major aspect of the HCALL is that it allows retireving _several_ counters at once (unlike regular PMCs, which are read one at a time). By reading several counters at once, we can get a more consistent snapshot of the system. This patchset extends the transaction interface to accomplish submitting several events to the PMU and have the PMU read them all at once. User is expected to submit the set of events they want to read as an event group. In the kernel, we submit each event to the PMU using the following logic (from Peter Zijlstra). pmu-start_txn(pmu, PMU_TXN_READ); leader-read(); for_each_sibling() sibling-read(); pmu-commit_txn(); where: - the -read()s queue events to be submitted to the hypervisor, and, - the -commit_txn() issues the HCALL, retrieves the result and updates the event count. Architectures/PMUs that don't need/implement PMU_TXN_READ type of transactions, simply ignore the -start_txn() and -commit_txn() and continue to read the counters one at a time in the -read() call. Compile/touch tested on x86. Need help testing on s390 and Sparc. Thanks to Peter Zijlstra for his input/code. Changelog[v4] - Ensure all the transactions operations happen on the same CPU so PMUs can use per-CPU buffers for the transaction. - Add lockdep assert and fix a locking issue in perf_read_group(). Changelog [v3] - Simple changes/reorg of patchset to split/rename functions - [Peter Zijlstra] Save the transaction flags in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). - [Peter Zijlstra] The nop txn interfaces don't need to disable/enable PMU for PERF_PMU_TXN_READ transactions. Changelog [v2] - Use the transaction interface unconditionally to avoid special-case code. Architectures/PMUs that don't need the READ transaction types simply ignore the -start_txn() and -commit_txn() calls. Peter Zijlstra (Intel) (1): perf: Rename perf_event_read_{one,group}, perf_read_hw Sukadev Bhattiprolu (9): perf: Add a flags parameter to pmu txn interfaces perf: Split perf_event_read() and perf_event_count() perf: Define perf_event_aggregate() perf: Unroll perf_event_read_value() in perf_read_group() perf: Add return value for perf_event_read(). perf: Add group parameter to perf_event_read() perf: Add return value to __perf_event_read() Define PERF_PMU_TXN_READ interface powerpc/perf/hv-24x7: Use PMU_TXN_READ interface arch/powerpc/perf/core-book3s.c | 25 +- arch/powerpc/perf/hv-24x7.c | 166 - arch/s390/kernel/perf_cpum_cf.c | 24 +- arch/sparc/kernel/perf_event.c | 19 - arch/x86/kernel/cpu/perf_event.c | 27 +- arch/x86/kernel/cpu/perf_event.h |1 + include/linux/perf_event.h | 15 +++- kernel/events/core.c | 167 +++--- 8 files changed, 403 insertions(+), 41 deletions(-) -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 03/10] perf: Define perf_event_aggregate()
Move the part of perf_event_read_value() that aggregates the event counts and event times into a new function, perf_event_aggregate(). This would allow us to call perf_event_aggregate() independently. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v4] [Peter Zijlstra] Add missing lockdep_assert(). Rename perf_event_compute() (to perf_event_aggregate()). Changelog[v3] Rather than move perf_event_read() into callers and then rename, just move the computations into a separate function (redesign to address comment from Peter Zijlstra). --- kernel/events/core.c | 39 ++- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f9ca8cb..97619ed 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3704,6 +3704,31 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +static u64 perf_event_aggregate(struct perf_event *event, u64 *enabled, + u64 *running) +{ + struct perf_event *child; + u64 total; + + total = perf_event_count(event); + + *enabled += event-total_time_enabled + + atomic64_read(event-child_total_time_enabled); + *running += event-total_time_running + + atomic64_read(event-child_total_time_running); + + lockdep_assert_held(event-child_mutex); + + list_for_each_entry(child, event-child_list, child_list) { + perf_event_read(child); + total += perf_event_count(child); + *enabled += child-total_time_enabled; + *running += child-total_time_running; + } + + return total; +} + /* * Remove all orphanes events from the context. */ @@ -3742,7 +3767,6 @@ static void orphans_remove_work(struct work_struct *work) u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { - struct perf_event *child; u64 total = 0; *enabled = 0; @@ -3751,19 +3775,8 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); perf_event_read(event); - total += perf_event_count(event); - - *enabled += event-total_time_enabled + - atomic64_read(event-child_total_time_enabled); - *running += event-total_time_running + - atomic64_read(event-child_total_time_running); + total = perf_event_aggregate(event, enabled, running); - list_for_each_entry(child, event-child_list, child_list) { - perf_event_read(child); - total += perf_event_count(child); - *enabled += child-total_time_enabled; - *running += child-total_time_running; - } mutex_unlock(event-child_mutex); return total; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 06/10] perf: Add return value for perf_event_read().
Add a return value to perf_event_read(). The return value will be needed later in perf_read_group() implements ability to read several counters in a PERF_PMU_TXN_READ transaction. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0ce3012..21a55d1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,7 +3212,7 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static void perf_event_read(struct perf_event *event) +static int perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3238,6 +3238,8 @@ static void perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(ctx-lock, flags); } + + return 0; } /* @@ -3720,7 +3722,7 @@ static u64 perf_event_aggregate(struct perf_event *event, u64 *enabled, lockdep_assert_held(event-child_mutex); list_for_each_entry(child, event-child_list, child_list) { - perf_event_read(child); + (void)perf_event_read(child); total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; @@ -3774,7 +3776,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); - perf_event_read(event); + (void)perf_event_read(event); total = perf_event_aggregate(event, enabled, running); mutex_unlock(event-child_mutex); @@ -3798,7 +3800,12 @@ static int perf_read_group(struct perf_event *event, mutex_lock(leader-child_mutex); - perf_event_read(leader); + ret = perf_event_read(leader); + if (ret) { + mutex_unlock(leader-child_mutex); + return ret; + } + count = perf_event_aggregate(leader, enabled, running); mutex_unlock(leader-child_mutex); @@ -3824,7 +3831,7 @@ static int perf_read_group(struct perf_event *event, mutex_lock(leader-child_mutex); - perf_event_read(sub); + (void)perf_event_read(sub); values[n++] = perf_event_aggregate(sub, enabled, running); mutex_unlock(leader-child_mutex); @@ -3946,7 +3953,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event); + (void)perf_event_read(event); local64_set(event-count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 05/10] perf: Unroll perf_event_read_value() in perf_read_group()
Unroll the calls to perf_event_read_value() in perf_read_group() so we can later optimize out parts we don't need for group events. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a6bd09d..0ce3012 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3794,7 +3794,14 @@ static int perf_read_group(struct perf_event *event, lockdep_assert_held(ctx-mutex); - count = perf_event_read_value(leader, enabled, running); + enabled = running = 0; + + mutex_lock(leader-child_mutex); + + perf_event_read(leader); + count = perf_event_aggregate(leader, enabled, running); + + mutex_unlock(leader-child_mutex); values[n++] = 1 + leader-nr_siblings; if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -3815,7 +3822,13 @@ static int perf_read_group(struct perf_event *event, list_for_each_entry(sub, leader-sibling_list, group_entry) { n = 0; - values[n++] = perf_event_read_value(sub, enabled, running); + mutex_lock(leader-child_mutex); + + perf_event_read(sub); + values[n++] = perf_event_aggregate(sub, enabled, running); + + mutex_unlock(leader-child_mutex); + if (read_format PERF_FORMAT_ID) values[n++] = primary_event_id(sub); -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 09/10] Define PERF_PMU_TXN_READ interface
Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu-start_txn()// Initialize before first event for each event in group pmu-read(event); // Queue each event to be read pmu-commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the -read() operation to _queue_ the counters to be read and use -commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore -start_txn() and -commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v4] [Peter Zijlstra] Add lockdep_assert_held() in perf_event_read_group(). Make sure the entire transaction happens on the same CPU. --- include/linux/perf_event.h |1 + kernel/events/core.c | 38 +++--- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 44bf05f..da307ad 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -169,6 +169,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags diff --git a/kernel/events/core.c b/kernel/events/core.c index 951d835..b5aa92c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3176,6 +3176,7 @@ void perf_event_exec(void) struct perf_read_data { struct perf_event *event; + bool group; int ret; }; @@ -3186,8 +3187,10 @@ static void __perf_event_read(void *info) { struct perf_read_data *data = info; struct perf_event *event = data-event; + struct perf_event *sub; struct perf_event_context *ctx = event-ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct pmu *pmu = event-pmu; /* * If this is a task context, we need to check whether it is @@ -3205,10 +3208,25 @@ static void __perf_event_read(void *info) update_cgrp_time_from_event(event); } update_event_times(event); - if (event-state == PERF_EVENT_STATE_ACTIVE) - event-pmu-read(event); + if (event-state != PERF_EVENT_STATE_ACTIVE) + goto unlock; + + if (!data-group) { + pmu-read(event); + goto unlock; + } + + pmu-start_txn(pmu, PERF_PMU_TXN_READ); - data-ret = 0; + pmu-read(event); + list_for_each_entry(sub, event-sibling_list, group_entry) { + if (sub-state == PERF_EVENT_STATE_ACTIVE) + pmu-read(sub); + } + + data-ret = pmu-commit_txn(pmu); + +unlock: raw_spin_unlock(ctx-lock); } @@ -3231,6 +3249,7 @@ static int perf_event_read(struct perf_event *event, bool group) if (event-state == PERF_EVENT_STATE_ACTIVE) { struct perf_read_data data = { .event = event, + .group = group, .ret = 0, }; @@ -3743,7 +3762,13 @@ static u64 perf_event_aggregate(struct perf_event *event, u64 *enabled, lockdep_assert_held(event-child_mutex); list_for_each_entry(child, event-child_list, child_list) { +#if 0 + /* +* TODO: Do we need this read() for group events on PMUs that +* don't implement PERF_PMU_TXN_READ transactions? +*/ (void)perf_event_read(child, false); +#endif total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; @@ -3821,7 +3846,7 @@ static int perf_read_group(struct perf_event *event, mutex_lock(leader-child_mutex); - ret = perf_event_read(leader); + ret = perf_event_read(leader, true); if (ret) { mutex_unlock(leader-child_mutex); return ret; @@ -3850,12 +3875,11 @@ static int perf_read_group(struct perf_event *event, list_for_each_entry(sub, leader-sibling_list, group_entry) { n = 0; - mutex_lock(leader-child_mutex); + mutex_lock(sub-child_mutex); - (void)perf_event_read(sub, false); values[n++] = perf_event_aggregate(sub, enabled, running); - mutex_unlock(leader-child_mutex); + mutex_unlock(sub-child_mutex); if (read_format PERF_FORMAT_ID) values[n++] = primary_event_id(sub); -- 1.7.9.5 ___ Linuxppc-dev mailing
[PATCH 01/10] perf: Add a flags parameter to pmu txn interfaces
Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a transaction type. The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v4] - [Peter Zijlstra] Fix an copy-paste error in power_pmu_cancel_txn(). - [Peter Zijlstra] Use __this_cpu_read() and __this_cpu_write(). Changelog[v3] - [Peter Zijlstra] Ensure the nop_txn interfaces disable/enable PMU only for TXN_ADD transactions. - [Peter Zijlstra] Cache the flags parameter in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). --- arch/powerpc/perf/core-book3s.c | 25 - arch/s390/kernel/perf_cpum_cf.c | 24 +++- arch/sparc/kernel/perf_event.c | 19 ++- arch/x86/kernel/cpu/perf_event.c | 27 +-- arch/x86/kernel/cpu/perf_event.h |1 + include/linux/perf_event.h | 14 +++--- kernel/events/core.c | 31 --- 7 files changed, 130 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index d90893b..b18efe4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -50,6 +50,7 @@ struct cpu_hw_events { unsigned int group_flag; int n_txn_start; + int txn_flags; /* BHRB bits */ u64 bhrb_filter;/* BHRB HW branch filter */ @@ -1586,11 +1587,19 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + cpuhw-txn_flags = txn_flags; + if (txn_flags ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw-group_flag |= PERF_EVENT_TXN; cpuhw-n_txn_start = cpuhw-n_events; @@ -1604,6 +1613,12 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + int txn_flags; + + txn_flags = cpuhw-txn_flags; + cpuhw-txn_flags = 0; + if (txn_flags ~PERF_PMU_TXN_ADD) + return; cpuhw-group_flag = ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1618,10 +1633,18 @@ static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; + int txn_flags; if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(cpu_hw_events); + + txn_flags = cpuhw-txn_flags; + cpuhw-txn_flags = 0; + if (cpuhw-txn_flags ~PERF_PMU_TXN_ADD) + return 0; + n = cpuhw-n_events; if (check_excludes(cpuhw-event, cpuhw-flags, 0, n)) return -EAGAIN; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 56fdad4..a6f9e7b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_tctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned intflags; + int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -572,11 +574,19 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void
[PATCH 02/10] perf: Split perf_event_read() and perf_event_count()
perf_event_read() does two things: - call the PMU to read/update the counter value, and - compute the total count of the event and its children Not all callers need both. perf_event_reset() for instance needs the first piece but doesn't need the second. Similarly, when we implement the ability to read a group of events using the transaction interface, we would need the two pieces done independently. Break up perf_event_read() and have it just read/update the counter and have the callers compute the total count if necessary. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4435bf5..f9ca8cb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,7 +3212,7 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static u64 perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3238,8 +3238,6 @@ static u64 perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(ctx-lock, flags); } - - return perf_event_count(event); } /* @@ -3751,14 +3749,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(event-child_mutex); - total += perf_event_read(event); + + perf_event_read(event); + total += perf_event_count(event); + *enabled += event-total_time_enabled + atomic64_read(event-child_total_time_enabled); *running += event-total_time_running + atomic64_read(event-child_total_time_running); list_for_each_entry(child, event-child_list, child_list) { - total += perf_event_read(child); + perf_event_read(child); + total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; } @@ -3918,7 +3920,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + perf_event_read(event); local64_set(event-count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 04/10] perf: Rename perf_event_read_{one,group}, perf_read_hw
From: Peter Zijlstra (Intel) pet...@infradead.org In order to free up the perf_event_read_group() name: s/perf_event_read_\(one\|group\)/perf_read_\1/g s/perf_read_hw/__perf_read/g Signed-off-by: Peter Zijlstra (Intel) pet...@infradead.org --- kernel/events/core.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 97619ed..a6bd09d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3675,7 +3675,7 @@ static void put_event(struct perf_event *event) * see the comment there. * * 2) there is a lock-inversion with mmap_sem through -* perf_event_read_group(), which takes faults while +* perf_read_group(), which takes faults while * holding ctx-mutex, however this is called after * the last filedesc died, so there is no possibility * to trigger the AB-BA case. @@ -3783,7 +3783,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_group(struct perf_event *event, +static int perf_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event-group_leader, *sub; @@ -3831,7 +3831,7 @@ static int perf_event_read_group(struct perf_event *event, return ret; } -static int perf_event_read_one(struct perf_event *event, +static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; @@ -3869,7 +3869,7 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, size_t count) { u64 read_format = event-attr.read_format; int ret; @@ -3887,9 +3887,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event-ctx-parent_ctx); if (read_format PERF_FORMAT_GROUP) - ret = perf_event_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf); else - ret = perf_event_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf); return ret; } @@ -3902,7 +3902,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ctx = perf_event_ctx_lock(event); - ret = perf_read_hw(event, buf, count); + ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); return ret; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 08/10] perf: Add return value to __perf_event_read()
Add a return value to __perf_event_read(). The return value will be needed later in perf_read_group() implements ability to read several counters in a PERF_PMU_TXN_READ transaction. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 22 +++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f38fe0b..951d835 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3174,12 +3174,18 @@ void perf_event_exec(void) rcu_read_unlock(); } +struct perf_read_data { + struct perf_event *event; + int ret; +}; + /* * Cross CPU call to read the hardware event */ static void __perf_event_read(void *info) { - struct perf_event *event = info; + struct perf_read_data *data = info; + struct perf_event *event = data-event; struct perf_event_context *ctx = event-ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -3201,6 +3207,8 @@ static void __perf_event_read(void *info) update_event_times(event); if (event-state == PERF_EVENT_STATE_ACTIVE) event-pmu-read(event); + + data-ret = 0; raw_spin_unlock(ctx-lock); } @@ -3214,13 +3222,21 @@ static inline u64 perf_event_count(struct perf_event *event) static int perf_event_read(struct perf_event *event, bool group) { + int ret = 0; + /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ if (event-state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data = { + .event = event, + .ret = 0, + }; + smp_call_function_single(event-oncpu, -__perf_event_read, event, 1); +__perf_event_read, data, 1); + ret = data.ret; } else if (event-state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event-ctx; unsigned long flags; @@ -3244,7 +3260,7 @@ static int perf_event_read(struct perf_event *event, bool group) raw_spin_unlock_irqrestore(ctx-lock, flags); } - return 0; + return ret; } /* -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 07/10] perf: Add group parameter to perf_event_read()
Add a 'group' parameter to perf_event_read(). It will be used (set to true) in a follow-on patch to update event times of the group. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 21a55d1..f38fe0b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,7 +3212,7 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static int perf_event_read(struct perf_event *event) +static int perf_event_read(struct perf_event *event, bool group) { /* * If event is enabled and currently active on a CPU, update the @@ -3235,7 +3235,12 @@ static int perf_event_read(struct perf_event *event) update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + + if (group) + update_group_times(event); + else + update_event_times(event); + raw_spin_unlock_irqrestore(ctx-lock, flags); } @@ -3722,7 +3727,7 @@ static u64 perf_event_aggregate(struct perf_event *event, u64 *enabled, lockdep_assert_held(event-child_mutex); list_for_each_entry(child, event-child_list, child_list) { - (void)perf_event_read(child); + (void)perf_event_read(child, false); total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; @@ -3776,7 +3781,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); - (void)perf_event_read(event); + (void)perf_event_read(event, false); total = perf_event_aggregate(event, enabled, running); mutex_unlock(event-child_mutex); @@ -3831,7 +3836,7 @@ static int perf_read_group(struct perf_event *event, mutex_lock(leader-child_mutex); - (void)perf_event_read(sub); + (void)perf_event_read(sub, false); values[n++] = perf_event_aggregate(sub, enabled, running); mutex_unlock(leader-child_mutex); @@ -3953,7 +3958,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + (void)perf_event_read(event, false); local64_set(event-count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 10/10] powerpc/perf/hv-24x7: Use PMU_TXN_READ interface
The 24x7 counters in Powerpc allow monitoring a large number of counters simultaneously. They also allow reading several counters in a single HCALL so we can get a more consistent snapshot of the system. Use the PMU's transaction interface to monitor and read several event counters at once. The idea is that users can group several 24x7 events into a single group of events. We use the following logic to submit the group of events to the PMU and read the values: pmu-start_txn()// Initialize before first event for each event in group pmu-read(event); // Queue each event to be read pmu-commit_txn() // Read/update all queuedcounters The -commit_txn() also updates the event counts in the respective perf_event objects. The perf subsystem can then directly get the event counts from the perf_event and can avoid submitting a new -read() request to the PMU. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v3] [Peter Zijlstra] Save the transaction state in -start_txn() and remove the flags parameter from -commit_txn() and -cancel_txn(). --- arch/powerpc/perf/hv-24x7.c | 166 ++- 1 file changed, 164 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 4d1a8d1..b86121c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; +DEFINE_PER_CPU(int, hv_24x7_txn_flags); +DEFINE_PER_CPU(int, hv_24x7_txn_err); + +struct hv_24x7_hw { + struct perf_event *events[255]; +}; + +DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); + /* * request_buffer and result_buffer are not required to be 4k aligned, * but are not allowed to cross any 4k boundary. Aligning them to 4k is @@ -1233,9 +1242,48 @@ static void update_event_count(struct perf_event *event, u64 now) static void h_24x7_event_read(struct perf_event *event) { u64 now; + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_hw *h24x7hw; + int txn_flags; + + txn_flags = __this_cpu_read(hv_24x7_txn_flags); + + /* +* If in a READ transaction, add this counter to the list of +* counters to read during the next HCALL (i.e commit_txn()). +* If not in a READ transaction, go ahead and make the HCALL +* to read this counter by itself. +*/ + + if (txn_flags PERF_PMU_TXN_READ) { + int i; + int ret; - now = h_24x7_get_value(event); - update_event_count(event, now); + if (__this_cpu_read(hv_24x7_txn_err)) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + + ret = add_event_to_24x7_request(event, request_buffer); + if (ret) { + __this_cpu_write(hv_24x7_txn_err, ret); + } else { + /* +* Assoicate the event with the HCALL request index, +* so -commit_txn() can quickly find/update count. +*/ + i = request_buffer-num_requests - 1; + + h24x7hw = get_cpu_var(hv_24x7_hw); + h24x7hw-events[i] = event; + put_cpu_var(h24x7hw); + } + + put_cpu_var(hv_24x7_reqb); + } else { + now = h_24x7_get_value(event); + update_event_count(event, now); + } } static void h_24x7_event_start(struct perf_event *event, int flags) @@ -1257,6 +1305,117 @@ static int h_24x7_event_add(struct perf_event *event, int flags) return 0; } +/* + * 24x7 counters only support READ transactions. They are + * always counting and dont need/support ADD transactions. + * Cache the flags, but otherwise ignore transactions that + * are not PERF_PMU_TXN_READ. + */ +static void h_24x7_event_start_txn(struct pmu *pmu, int flags) +{ + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_data_result_buffer *result_buffer; + + /* We should not be called if we are already in a txn */ + WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags)); + + __this_cpu_write(hv_24x7_txn_flags, flags); + if (flags ~PERF_PMU_TXN_READ) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); + + init_24x7_request(request_buffer, result_buffer); + + put_cpu_var(hv_24x7_resb); + put_cpu_var(hv_24x7_reqb); +} + +/* + * Clean up transaction state. + * + * NOTE: Ignore state of request and result buffers for now. + * We will initialize them during the next read/txn. + */ +static void
Re: [PATCH v3 5/8] perf: Split perf_event_read_value()
Peter Zijlstra [pet...@infradead.org] wrote: | On Tue, Jul 14, 2015 at 08:01:52PM -0700, Sukadev Bhattiprolu wrote: | Move the part of perf_event_read_value() that computes the event | counts and event times into a new function, perf_event_compute(). | | This would allow us to call perf_event_compute() independently. | | Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com | | Changelog[v3] | Rather than move perf_event_read() into callers and then | rename, just move the computations into a separate function | (redesign to address comment from Peter Zijlstra). | --- | kernel/events/core.c | 37 - | 1 file changed, 24 insertions(+), 13 deletions(-) | | diff --git a/kernel/events/core.c b/kernel/events/core.c | index 44fb89d..b1e9a42 100644 | --- a/kernel/events/core.c | +++ b/kernel/events/core.c | @@ -3704,6 +3704,29 @@ static int perf_release(struct inode *inode, struct file *file) | return 0; | } | | +static u64 perf_event_compute(struct perf_event *event, u64 *enabled, | + u64 *running) | +{ | + struct perf_event *child; | + u64 total; | + | + total = perf_event_count(event); | + | + *enabled += event-total_time_enabled + | + atomic64_read(event-child_total_time_enabled); | + *running += event-total_time_running + | + atomic64_read(event-child_total_time_running); | + | + list_for_each_entry(child, event-child_list, child_list) { | + perf_event_read(child); | | Sure we don't want that.. So if say x86 calls perf_event_read_value() the current upstream code makes the perf_event_read(child). If we remove this, then it would be a change in behavior? I have commented it out and have TODO in the latest patchset. Pls review and let me know if we should drop this read (and the TODO) of the child event. Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 7/8] perf: Define PMU_TXN_READ interface
Peter Zijlstra [pet...@infradead.org] wrote: | On Wed, Jul 22, 2015 at 04:19:16PM -0700, Sukadev Bhattiprolu wrote: | Peter Zijlstra [pet...@infradead.org] wrote: | | I've not woken up yet, and not actually fully read the email, but can | | you stuff the entire above chunk inside the IPI? | | | | I think you could then actually optimize __perf_event_read() as well, | | because all these events should be on the same context, so no point in | | calling update_*time*() for every event or so. | | | | Do you mean something like this (will move the rename to a separate | patch before posting): | | More like so.. please double check, I've not even had tea yet. Yeah, I realized I had ignored the 'event-cpu' spec. Will try this out. Thanks, Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 7/8] perf: Define PMU_TXN_READ interface
Peter Zijlstra [pet...@infradead.org] wrote: | I've not woken up yet, and not actually fully read the email, but can | you stuff the entire above chunk inside the IPI? | | I think you could then actually optimize __perf_event_read() as well, | because all these events should be on the same context, so no point in | calling update_*time*() for every event or so. | Do you mean something like this (will move the rename to a separate patch before posting): -- From e8eddb5d3877ebdb3b71213a00aaa980f4010dd0 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Date: Tue, 7 Jul 2015 21:45:23 -0400 Subject: [PATCH 1/1] perf: Define PMU_TXN_READ interface Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. Note that we use this interface with all PMUs. PMUs that implement this interface use the -read() operation to _queue_ the counters to be read and use -commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore -start_txn() and -commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v5] [Peter Zijlstra] Ensure the entire transaction happens on the same CPU. Changelog[v4] [Peter Zijlstra] Add lockdep_assert_held() in perf_event_read_group() --- include/linux/perf_event.h |1 + kernel/events/core.c | 72 +--- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 44bf05f..da307ad 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -169,6 +169,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags diff --git a/kernel/events/core.c b/kernel/events/core.c index a6bd09d..7177dd8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3174,12 +3174,8 @@ void perf_event_exec(void) rcu_read_unlock(); } -/* - * Cross CPU call to read the hardware event - */ -static void __perf_event_read(void *info) +static void __perf_event_read(struct perf_event *event, int update_ctx) { - struct perf_event *event = info; struct perf_event_context *ctx = event-ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -3194,7 +3190,7 @@ static void __perf_event_read(void *info) return; raw_spin_lock(ctx-lock); - if (ctx-is_active) { + if (ctx-is_active update_ctx) { update_context_time(ctx); update_cgrp_time_from_event(event); } @@ -3204,6 +3200,16 @@ static void __perf_event_read(void *info) raw_spin_unlock(ctx-lock); } +/* + * Cross CPU call to read the hardware event + */ +static void __perf_event_read_ipi(void *info) +{ + struct perf_event *event = info; + + __perf_event_read(event, 1); +} + static inline u64 perf_event_count(struct perf_event *event) { if (event-pmu-count) @@ -3220,7 +3226,7 @@ static void perf_event_read(struct perf_event *event) */ if (event-state == PERF_EVENT_STATE_ACTIVE) { smp_call_function_single(event-oncpu, -__perf_event_read, event, 1); +__perf_event_read_ipi, event, 1); } else if (event-state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event-ctx; unsigned long flags; @@ -3765,6 +3771,36 @@ static void orphans_remove_work(struct work_struct *work) put_ctx(ctx); } +/* + * Use the transaction interface to read the group of events in @leader. + * PMUs like the 24x7 counters in Power, can use this to queue the events + * in the -read() operation and perform the actual read in -commit_txn. + * + * Other PMUs can ignore the -start_txn and -commit_txn and read each + * PMU directly in the -read() operation. + */ +static int perf_event_read_group(struct perf_event *leader) +{ + int ret; + struct perf_event *sub; + struct pmu *pmu; + struct perf_event_context *ctx = leader-ctx; + + lockdep_assert_held(ctx-mutex); + + pmu = leader-pmu; + + pmu-start_txn(pmu, PERF_PMU_TXN_READ); + + __perf_event_read(leader, 1); + list_for_each_entry(sub, leader-sibling_list, group_entry) + __perf_event_read(sub, 0); + + ret = pmu-commit_txn(pmu); + + return ret; +} + u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { u64 total = 0; @@ -3794,7 +3830,17 @@ static int perf_read_group(struct perf_event *event, lockdep_assert_held(ctx-mutex); - count = perf_event_read_value(leader
Re: [PATCH v3 7/8] perf: Define PMU_TXN_READ interface
Peter Zijlstra [pet...@infradead.org] wrote: | On Tue, Jul 14, 2015 at 08:01:54PM -0700, Sukadev Bhattiprolu wrote: | +/* | + * Use the transaction interface to read the group of events in @leader. | + * PMUs like the 24x7 counters in Power, can use this to queue the events | + * in the -read() operation and perform the actual read in -commit_txn. | + * | + * Other PMUs can ignore the -start_txn and -commit_txn and read each | + * PMU directly in the -read() operation. | + */ | +static int perf_event_read_group(struct perf_event *leader) | +{ | + int ret; | + struct perf_event *sub; | + struct pmu *pmu; | + | + pmu = leader-pmu; | + | + pmu-start_txn(pmu, PERF_PMU_TXN_READ); | + | + perf_event_read(leader); | | There should be a lockdep assert with that list iteration. | | + list_for_each_entry(sub, leader-sibling_list, group_entry) | + perf_event_read(sub); | + | + ret = pmu-commit_txn(pmu); Peter, I have a situation :-) We are trying to use the following interface: start_txn(pmu, PERF_PMU_TXN_READ); perf_event_read(leader); list_for_each(sibling, leader-sibling_list, group_entry) perf_event_read(sibling) pmu-commit_txn(pmu); with the idea that the PMU driver would save the type of transaction in -start_txn() and use in -read() and -commit_txn(). But since -start_txn() and the -read() operations could happen on different CPUs (perf_event_read() uses the event-oncpu to schedule a call), the PMU driver cannot use a per-cpu variable to save the state in -start_txn(). I tried using a pmu-wide global, but that would also need us to hold a mutex to serialize access to that global. The problem is -start_txn() can be called from an interrupt context for the TXN_ADD transactions (I got the following backtrace during testing) mutex_lock_nested+0x504/0x520 (unreliable) h_24x7_event_start_txn+0x3c/0xd0 group_sched_in+0x70/0x230 ctx_sched_in.isra.63+0x150/0x230 __perf_install_in_context+0x1c8/0x1e0 remote_function+0x7c/0xa0 flush_smp_call_function_queue+0xb0/0x1d0 smp_ipi_demux+0x88/0xf0 icp_hv_ipi_action+0x54/0xc0 handle_irq_event_percpu+0x98/0x2b0 handle_percpu_irq+0x7c/0xc0 generic_handle_irq+0x4c/0x80 __do_irq+0x7c/0x190 call_do_irq+0x14/0x24 do_IRQ+0x8c/0x100 hardware_interrupt_common+0x168/0x180 --- interrupt: 501 at .plpar_hcall_norets+0x14/0x20 Basically stuck trying to save the txn type in -start_txn() and retrieve in -read(). Couple of options I can think of are: - having -start_txn() return a handle that should then be passed in with -read() (yuck) and -commit_txn(). - serialize the READ transaction for the PMU in perf_event_read_group() with a new pmu-txn_mutex: mutex_lock(pmu-txn_mutex); pmu-start_txn() list_for_each_entry(sub, leader-sibling_list, group_entry) perf_event_read(sub); ret = pmu-commit_txn(pmu); mutex_unlock(pmu-txn_mutex); such serialization would be ok with 24x7 counters (they are system wide counters anyway) We could maybe skip the mutex for PMUs that don't implement TXN_READ interface. or is there better way? Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v5 0/7]powerpc/powernv: Nest Instrumentation support
Madhavan Srinivasan [ma...@linux.vnet.ibm.com] wrote: | This patchset enables Nest Instrumentation support on powerpc. | POWER8 has per-chip Nest Intrumentation which provides various | per-chip metrics like memory, powerbus, Xlink and Alink | bandwidth. | snip | Cc: Michael Ellerman m...@ellerman.id.au | Cc: Benjamin Herrenschmidt b...@kernel.crashing.org | Cc: Paul Mackerras pau...@samba.org | Cc: Anton Blanchard an...@samba.org | Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com | Cc: Anshuman Khandual khand...@linux.vnet.ibm.com | Cc: Stephane Eranian eran...@google.com | Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com Thanks for addressing my comments from earlier version. Reviewed-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 3/8] perf: Add a flags parameter to pmu txn interfaces
Peter Zijlstra [pet...@infradead.org] wrote: | On Tue, Jul 14, 2015 at 08:01:50PM -0700, Sukadev Bhattiprolu wrote: | @@ -1604,6 +1613,12 @@ static void power_pmu_start_txn(struct pmu *pmu) | static void power_pmu_cancel_txn(struct pmu *pmu) | { | struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); | + int txn_flags; | + | + txn_flags = cpuhw-txn_flags; | + cpuhw-txn_flags = 0; | + if (cpuhw-txn_flags ~PERF_PMU_TXN_ADD) | + return; | | That seems, unintentional? ;-) Argh. Thanks for catching it. Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: BUG: perf error on syscalls for powerpc64.
Zumeng Chen [zumeng.c...@gmail.com] wrote: | 3. What I have seen in 3.14.x kernel, | == | And so far, no more difference to 4.x kernel from me about this part if | I'm right. | | *) With 1028ccf5 | | perf list|grep -i syscall got me nothing. | | | *) Without 1028ccf5 | root@localhost:~# perf list|grep -i syscall |syscalls:sys_enter_socket [Tracepoint event] |syscalls:sys_exit_socket [Tracepoint event] |syscalls:sys_enter_socketpair [Tracepoint event] |syscalls:sys_exit_socketpair [Tracepoint event] |syscalls:sys_enter_bind[Tracepoint event] |syscalls:sys_exit_bind [Tracepoint event] |syscalls:sys_enter_listen [Tracepoint event] |syscalls:sys_exit_listen [Tracepoint event] |... ... Are you seeing this on big-endian or little-endian system? IIRC, I saw the opposite behavior on an LE system a few months ago. i.e. without 1028ccf5, 'perf listf|grep syscall' failed. Applying 1028ccf5, seemed to fix it. Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 5/8] perf: Split perf_event_read_value()
Peter Zijlstra [pet...@infradead.org] wrote: | On Tue, Jul 14, 2015 at 08:01:52PM -0700, Sukadev Bhattiprolu wrote: | Move the part of perf_event_read_value() that computes the event | counts and event times into a new function, perf_event_compute(). | | This would allow us to call perf_event_compute() independently. | | Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com | | Changelog[v3] | Rather than move perf_event_read() into callers and then | rename, just move the computations into a separate function | (redesign to address comment from Peter Zijlstra). | --- | | Changelog[] bits go here, below the '---' where they get discarded. Sorry. Will fix it. | | kernel/events/core.c | 37 - | 1 file changed, 24 insertions(+), 13 deletions(-) | | diff --git a/kernel/events/core.c b/kernel/events/core.c | index 44fb89d..b1e9a42 100644 | --- a/kernel/events/core.c | +++ b/kernel/events/core.c | @@ -3704,6 +3704,29 @@ static int perf_release(struct inode *inode, struct file *file) | return 0; | } | | +static u64 perf_event_compute(struct perf_event *event, u64 *enabled, | + u64 *running) | | This is a horrible name, 'compute' what? We are aggregating event counts and time for children. Would perf_event_aggregate() or perf_event_aggregate_children() be better? | | +{ | + struct perf_event *child; | + u64 total; | + | + total = perf_event_count(event); | + | + *enabled += event-total_time_enabled + | + atomic64_read(event-child_total_time_enabled); | + *running += event-total_time_running + | + atomic64_read(event-child_total_time_running); | + | | lockdep_assert_held(event-child_mutex); OK. Thanks for the comments. Sukadev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 0/8] Implement group-read of events using txn interface
Unlike normal hardware PMCs, the 24x7 counters in Power8 are stored in memory and accessed via a hypervisor call(HCALL). A major aspect of the HCALL is that it allows retireving _several_ counters at once (unlike regular PMCs, which are read one at a time). By reading several counters at once, we can get a more consistent snapshot of the system. This patchset extends the transaction interface to accomplish submitting several events to the PMU and have the PMU read them all at once. User is expected to submit the set of events they want to read as an event group. In the kernel, we submit each event to the PMU using the following logic (from Peter Zijlstra). pmu-start_txn(pmu, PMU_TXN_READ); leader-read(); for_each_sibling() sibling-read(); pmu-commit_txn(); where: - the -read()s queue events to be submitted to the hypervisor, and, - the -commit_txn() issues the HCALL, retrieves the result and updates the event count. Architectures/PMUs that don't need/implement PMU_TXN_READ type of transactions, simply ignore the -start_txn() and -commit_txn() and continue to read the counters one at a time in the -read() call. Compile/touch tested on x86. Need help testing on s390 and Sparc. Thanks to Peter Zijlstra for his input. Changelog [v3] - Simple changes/reorg of patchset to split/rename functions - [Peter Zijlstra] Save the transaction flags in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). - [Peter Zijlstra] The nop txn interfaces don't need to disable/enable PMU for PERF_PMU_TXN_READ transactions. Changelog [v2] - Use the transaction interface unconditionally to avoid special-case code. Architectures/PMUs that don't need the READ transaction types simply ignore the -start_txn() and -commit_txn() calls. Peter Zijlstra (Intel) (1): perf: Rename perf_event_read_{one,group}, perf_read_hw Sukadev Bhattiprolu (7): powerpc/perf/hv-24x7: Whitespace - fix parameter alignment powerpc/perf/hv-24x7: Simplify extracting counter from result buffer perf: Add a flags parameter to pmu txn interfaces perf: Split perf_event_read() and perf_event_count() perf: Split perf_event_read_value() perf: Define PMU_TXN_READ interface powerpc/perf/hv-24x7: Use PMU_TXN_READ interface arch/powerpc/perf/core-book3s.c | 25 - arch/powerpc/perf/hv-24x7.c | 186 ++ arch/s390/kernel/perf_cpum_cf.c | 24 - arch/sparc/kernel/perf_event.c | 19 +++- arch/x86/kernel/cpu/perf_event.c | 27 +- arch/x86/kernel/cpu/perf_event.h |1 + include/linux/perf_event.h | 15 ++- kernel/events/core.c | 143 +++-- 8 files changed, 389 insertions(+), 51 deletions(-) -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 5/8] perf: Split perf_event_read_value()
Move the part of perf_event_read_value() that computes the event counts and event times into a new function, perf_event_compute(). This would allow us to call perf_event_compute() independently. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v3] Rather than move perf_event_read() into callers and then rename, just move the computations into a separate function (redesign to address comment from Peter Zijlstra). --- kernel/events/core.c | 37 - 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 44fb89d..b1e9a42 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3704,6 +3704,29 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +static u64 perf_event_compute(struct perf_event *event, u64 *enabled, + u64 *running) +{ + struct perf_event *child; + u64 total; + + total = perf_event_count(event); + + *enabled += event-total_time_enabled + + atomic64_read(event-child_total_time_enabled); + *running += event-total_time_running + + atomic64_read(event-child_total_time_running); + + list_for_each_entry(child, event-child_list, child_list) { + perf_event_read(child); + total += perf_event_count(child); + *enabled += child-total_time_enabled; + *running += child-total_time_running; + } + + return total; +} + /* * Remove all orphanes events from the context. */ @@ -3742,7 +3765,6 @@ static void orphans_remove_work(struct work_struct *work) u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { - struct perf_event *child; u64 total = 0; *enabled = 0; @@ -3751,19 +3773,8 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); perf_event_read(event); - total += perf_event_count(event); + total = perf_event_compute(event, enabled, running); - *enabled += event-total_time_enabled + - atomic64_read(event-child_total_time_enabled); - *running += event-total_time_running + - atomic64_read(event-child_total_time_running); - - list_for_each_entry(child, event-child_list, child_list) { - perf_event_read(child); - total += perf_event_count(child); - *enabled += child-total_time_enabled; - *running += child-total_time_running; - } mutex_unlock(event-child_mutex); return total; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 7/8] perf: Define PMU_TXN_READ interface
Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. Note that we use this interface with all PMUs. PMUs that implement this interface use the -read() operation to _queue_ the counters to be read and use -commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore -start_txn() and -commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- include/linux/perf_event.h |1 + kernel/events/core.c | 35 +-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 44bf05f..da307ad 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -169,6 +169,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags diff --git a/kernel/events/core.c b/kernel/events/core.c index a83d45c..2ea06c4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3763,6 +3763,33 @@ static void orphans_remove_work(struct work_struct *work) put_ctx(ctx); } +/* + * Use the transaction interface to read the group of events in @leader. + * PMUs like the 24x7 counters in Power, can use this to queue the events + * in the -read() operation and perform the actual read in -commit_txn. + * + * Other PMUs can ignore the -start_txn and -commit_txn and read each + * PMU directly in the -read() operation. + */ +static int perf_event_read_group(struct perf_event *leader) +{ + int ret; + struct perf_event *sub; + struct pmu *pmu; + + pmu = leader-pmu; + + pmu-start_txn(pmu, PERF_PMU_TXN_READ); + + perf_event_read(leader); + list_for_each_entry(sub, leader-sibling_list, group_entry) + perf_event_read(sub); + + ret = pmu-commit_txn(pmu); + + return ret; +} + u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { u64 total = 0; @@ -3792,7 +3819,11 @@ static int perf_read_group(struct perf_event *event, lockdep_assert_held(ctx-mutex); - count = perf_event_read_value(leader, enabled, running); + ret = perf_event_read_group(leader); + if (ret) + return ret; + + count = perf_event_compute(leader, enabled, running); values[n++] = 1 + leader-nr_siblings; if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -3813,7 +3844,7 @@ static int perf_read_group(struct perf_event *event, list_for_each_entry(sub, leader-sibling_list, group_entry) { n = 0; - values[n++] = perf_event_read_value(sub, enabled, running); + values[n++] = perf_event_compute(sub, enabled, running); if (read_format PERF_FORMAT_ID) values[n++] = primary_event_id(sub); -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 2/8] powerpc/perf/hv-24x7: Simplify extracting counter from result buffer
Simplify code that extracts a 24x7 counter from the HCALL's result buffer. Suggested-by: Joe Perches j...@perches.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9d73c69..4d1a8d1 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1104,7 +1104,7 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count) unsigned long ret; struct hv_24x7_request_buffer *request_buffer; struct hv_24x7_data_result_buffer *result_buffer; - struct hv_24x7_result *resb; + __be64 val; BUILD_BUG_ON(sizeof(*request_buffer) 4096); BUILD_BUG_ON(sizeof(*result_buffer) 4096); @@ -1125,8 +1125,8 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count) } /* process result from hcall */ - resb = result_buffer-results[0]; - *count = be64_to_cpu(resb-elements[0].element_data[0]); + val = result_buffer-results[0].elements[0].element_data[0]; + *count = be64_to_cpu(val); out: put_cpu_var(hv_24x7_reqb); -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 1/8] powerpc/perf/hv-24x7: Whitespace - fix parameter alignment
Fix parameter alignment to be consistent with coding style. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index df95629..9d73c69 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -416,7 +416,7 @@ out_val: } static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event, - int nonce) + int nonce) { int nl, dl; char *name = event_name(event, nl); @@ -444,7 +444,7 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) } static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, - struct hv_24x7_event_data *event, int nonce) + struct hv_24x7_event_data *event, int nonce) { unsigned i; @@ -512,7 +512,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2) } static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, - size_t s2, unsigned d2) + size_t s2, unsigned d2) { int r = memord(v1, s1, v2, s2); @@ -526,7 +526,7 @@ static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, } static int event_uniq_add(struct rb_root *root, const char *name, int nl, - unsigned domain) + unsigned domain) { struct rb_node **new = (root-rb_node), *parent = NULL; struct event_uniq *data; @@ -650,8 +650,8 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, #define MAX_4K (SIZE_MAX / 4096) static int create_events_from_catalog(struct attribute ***events_, - struct attribute ***event_descs_, - struct attribute ***event_long_descs_) + struct attribute ***event_descs_, + struct attribute ***event_long_descs_) { unsigned long hret; size_t catalog_len, catalog_page_len, event_entry_count, @@ -1008,8 +1008,8 @@ static const struct attribute_group *attr_groups[] = { }; static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer, - struct hv_24x7_data_result_buffer *result_buffer, - unsigned long ret) + struct hv_24x7_data_result_buffer *result_buffer, + unsigned long ret) { struct hv_24x7_request *req; @@ -1026,7 +1026,7 @@ static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer, * Start the process for a new H_GET_24x7_DATA hcall. */ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, - struct hv_24x7_data_result_buffer *result_buffer) + struct hv_24x7_data_result_buffer *result_buffer) { memset(request_buffer, 0, 4096); @@ -1041,7 +1041,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, * by 'init_24x7_request()' and 'add_event_to_24x7_request()'. */ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer, - struct hv_24x7_data_result_buffer *result_buffer) +struct hv_24x7_data_result_buffer *result_buffer) { unsigned long ret; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 6/8] perf: Rename perf_event_read_{one, group}, perf_read_hw
From: Peter Zijlstra (Intel) pet...@infradead.org In order to free up the perf_event_read_group() name: s/perf_event_read_\(one\|group\)/perf_read_\1/g s/perf_read_hw/__perf_read/g Signed-off-by: Peter Zijlstra (Intel) pet...@infradead.org --- kernel/events/core.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b1e9a42..a83d45c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3675,7 +3675,7 @@ static void put_event(struct perf_event *event) * see the comment there. * * 2) there is a lock-inversion with mmap_sem through -* perf_event_read_group(), which takes faults while +* perf_read_group(), which takes faults while * holding ctx-mutex, however this is called after * the last filedesc died, so there is no possibility * to trigger the AB-BA case. @@ -3781,7 +3781,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_group(struct perf_event *event, +static int perf_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event-group_leader, *sub; @@ -3829,7 +3829,7 @@ static int perf_event_read_group(struct perf_event *event, return ret; } -static int perf_event_read_one(struct perf_event *event, +static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; @@ -3867,7 +3867,7 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, size_t count) { u64 read_format = event-attr.read_format; int ret; @@ -3885,9 +3885,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event-ctx-parent_ctx); if (read_format PERF_FORMAT_GROUP) - ret = perf_event_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf); else - ret = perf_event_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf); return ret; } @@ -3900,7 +3900,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ctx = perf_event_ctx_lock(event); - ret = perf_read_hw(event, buf, count); + ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); return ret; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 4/8] perf: Split perf_event_read() and perf_event_count()
perf_event_read() does two things: - call the PMU to read/update the counter value, and - compute the total count of the event and its children Not all callers need both. perf_event_reset() for instance needs the first piece but doesn't need the second. Similarly, when we implement the ability to read a group of events using the transaction interface, we would need the two pieces done independently. Break up perf_event_read() and have it just read/update the counter and have the callers compute the total count if necessary. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- kernel/events/core.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b79aad2..44fb89d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,7 +3212,7 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } -static u64 perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3238,8 +3238,6 @@ static u64 perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(ctx-lock, flags); } - - return perf_event_count(event); } /* @@ -3751,14 +3749,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(event-child_mutex); - total += perf_event_read(event); + + perf_event_read(event); + total += perf_event_count(event); + *enabled += event-total_time_enabled + atomic64_read(event-child_total_time_enabled); *running += event-total_time_running + atomic64_read(event-child_total_time_running); list_for_each_entry(child, event-child_list, child_list) { - total += perf_event_read(child); + perf_event_read(child); + total += perf_event_count(child); *enabled += child-total_time_enabled; *running += child-total_time_running; } @@ -3918,7 +3920,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + perf_event_read(event); local64_set(event-count, 0); perf_event_update_userpage(event); } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 3/8] perf: Add a flags parameter to pmu txn interfaces
Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a transaction type. The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v3] - [Peter Zijlstra] Ensure the nop_txn interfaces disable/enable PMU only for TXN_ADD transactions. - [Peter Zijlstra] Cache the flags parameter in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). --- arch/powerpc/perf/core-book3s.c | 25 ++- arch/s390/kernel/perf_cpum_cf.c | 24 +- arch/sparc/kernel/perf_event.c | 19 +- arch/x86/kernel/cpu/perf_event.c | 27 ++-- arch/x86/kernel/cpu/perf_event.h |1 + include/linux/perf_event.h | 14 --- kernel/events/core.c | 51 +++--- 7 files changed, 150 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index d90893b..b92084b 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -50,6 +50,7 @@ struct cpu_hw_events { unsigned int group_flag; int n_txn_start; + int txn_flags; /* BHRB bits */ u64 bhrb_filter;/* BHRB HW branch filter */ @@ -1586,11 +1587,19 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + cpuhw-txn_flags = txn_flags; + if (txn_flags ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw-group_flag |= PERF_EVENT_TXN; cpuhw-n_txn_start = cpuhw-n_events; @@ -1604,6 +1613,12 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + int txn_flags; + + txn_flags = cpuhw-txn_flags; + cpuhw-txn_flags = 0; + if (cpuhw-txn_flags ~PERF_PMU_TXN_ADD) + return; cpuhw-group_flag = ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1618,10 +1633,18 @@ static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; + int txn_flags; if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(cpu_hw_events); + + txn_flags = cpuhw-txn_flags; + cpuhw-txn_flags = 0; + if (cpuhw-txn_flags ~PERF_PMU_TXN_ADD) + return 0; + n = cpuhw-n_events; if (check_excludes(cpuhw-event, cpuhw-flags, 0, n)) return -EAGAIN; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 56fdad4..a6f9e7b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_tctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned intflags; + int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -572,11 +574,19 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void cpumf_pmu_start_txn(struct pmu *pmu) +static void cpumf_pmu_start_txn(struct pmu *pmu, int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(cpu_hw_events); + cpuhw-txn_flags
[PATCH v3 8/8] powerpc/perf/hv-24x7: Use PMU_TXN_READ interface
The 24x7 counters in Powerpc allow monitoring a large number of counters simultaneously. They also allow reading several counters in a single HCALL so we can get a more consistent snapshot of the system. Use the PMU's transaction interface to monitor and read several event counters at once. The idea is that users can group several 24x7 events into a single group of events. We use the following logic to submit the group of events to the PMU and read the values: pmu-start_txn()// Initialize before first event for each event in group pmu-read(event); // Queue each event to be read pmu-commit_txn() // Read/update all queuedcounters The -commit_txn() also updates the event counts in the respective perf_event objects. The perf subsystem can then directly get the event counts from the perf_event and can avoid submitting a new -read() request to the PMU. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v3] [Peter Zijlstra] Save the transaction state in -start_txn() and drop the flags parameter from -commit_txn() and -cancel_txn(). [Peter Zijlstra] The nop txn interfaces don't need to disable/enable PMU for PERF_PMU_TXN_READ transactions. --- arch/powerpc/perf/hv-24x7.c | 160 ++- 1 file changed, 157 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 4d1a8d1..c28ef3f 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -142,8 +142,24 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; +struct h_24x7_hw { + int txn_err; + int txn_flags; + struct perf_event *events[255]; +} h24x7hw; + /* - * request_buffer and result_buffer are not required to be 4k aligned, + * The request and result buffers are also used in interrupt context + * (eg: we read/update the event counts in h_24x7_event_stop()). Rather + * than allocating buffers in interrupt context (i.e before each HCALL), + * pre-allocate per-CPU request and result buffers. + * + * However, for the transaction interface, the -start_txn(), where the + * buffers are initialized and the -read() operations (where the buffers + * are used) are not guaranteed to be on the same CPU. Hence, we cannot + * use the per-CPU buffers. Use PMU-wide request and result buffers instead. + * + * Note that request and result buffers are not required to be 4k aligned, * but are not allowed to cross any 4k boundary. Aligning them to 4k is * the simplest way to ensure that. */ @@ -151,6 +167,9 @@ static struct kmem_cache *hv_page_cache; DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +char hv_24x7_txn_reqb[H24x7_DATA_BUFFER_SIZE] __aligned(4096); +char hv_24x7_txn_resb[H24x7_DATA_BUFFER_SIZE] __aligned(4096); + static char *event_name(struct hv_24x7_event_data *ev, int *len) { *len = be16_to_cpu(ev-event_name_len) - 2; @@ -1233,9 +1252,42 @@ static void update_event_count(struct perf_event *event, u64 now) static void h_24x7_event_read(struct perf_event *event) { u64 now; + struct hv_24x7_request_buffer *request_buffer; + + /* +* If in a READ transaction, add this counter to the list of +* counters to read during the next HCALL (i.e commit_txn()). +* If not in a READ transaction, go ahead and make the HCALL +* to read this counter by itself. +*/ + + if (h24x7hw.txn_flags PERF_PMU_TXN_READ) { + int i; + int ret; + + if (h24x7hw.txn_err) + return; + + request_buffer = (void *)hv_24x7_txn_reqb[0]; + + ret = add_event_to_24x7_request(event, request_buffer); + if (ret) { + h24x7hw.txn_err = ret; + } else { + /* +* Assoicate the event with the HCALL request index, +* so -commit_txn() can quickly find/update count. +*/ + i = request_buffer-num_requests - 1; + h24x7hw.events[i] = event; + } + + put_cpu_var(hv_24x7_reqb); + } else { + now = h_24x7_get_value(event); + update_event_count(event, now); + } - now = h_24x7_get_value(event); - update_event_count(event, now); } static void h_24x7_event_start(struct perf_event *event, int flags) @@ -1257,6 +1309,105 @@ static int h_24x7_event_add(struct perf_event *event, int flags) return 0; } +/* + * 24x7 counters only support READ transactions. They are + * always counting and dont need/support ADD transactions. + * Cache
Re: [PATCH v4 4/7] powerpc/powernv: detect supported nest pmus and its events
Sukadev Bhattiprolu [suka...@linux.vnet.ibm.com] wrote: | | @@ -50,6 +163,15 @@ static int nest_ima_dt_parser(void) | | p8ni-vbase = (uint64_t) phys_to_virt(p8ni-pbase); | | } | | | | + /* Look for supported Nest PMU units */ | | + idx = 0; | | + for_each_node_by_type(dev, nest-ima-unit) { | | + ret = nest_pmu_create(dev, idx); | | + if (ret) | | + return ret; | | + idx++; | | idx not used? Sorry, disregard this. Had my blinders on :-( ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v4 4/7] powerpc/powernv: detect supported nest pmus and its events
Madhavan Srinivasan [ma...@linux.vnet.ibm.com] wrote: | | Are the 'start.*' and 'unit.*' files events by themselves or just attributes | of events? | | These are attributes needed for computation. unit and scale attributes | will be used by perf tool in post-processing the counter data. These | can also use by other tools like pcp. OK. Thanks for clarifying. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v4 4/7] powerpc/powernv: detect supported nest pmus and its events
Madhavan Srinivasan [ma...@linux.vnet.ibm.com] wrote: | Parse device tree to detect supported nest pmu units. Traverse | through each nest pmu unit folder to find supported events and | corresponding unit/scale files (if any). | | The nest unit event file from DT, will contain the offset in the | reserved memory region to get the counter data for a given event. | Kernel code uses this offset as event configuration value. | | Device tree parser code also looks for scale/unit in the file name and | passes on the file as an event attr for perf tool to use in the post | processing. | | Cc: Michael Ellerman m...@ellerman.id.au | Cc: Benjamin Herrenschmidt b...@kernel.crashing.org | Cc: Paul Mackerras pau...@samba.org | Cc: Anton Blanchard an...@samba.org | Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com | Cc: Anshuman Khandual khand...@linux.vnet.ibm.com | Cc: Stephane Eranian eran...@google.com | Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com | --- | arch/powerpc/perf/nest-pmu.c | 124 ++- | 1 file changed, 123 insertions(+), 1 deletion(-) | | diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c | index e7d45ed..6116ff3 100644 | --- a/arch/powerpc/perf/nest-pmu.c | +++ b/arch/powerpc/perf/nest-pmu.c | @@ -11,6 +11,119 @@ | #include nest-pmu.h | | static struct perchip_nest_info p8_nest_perchip_info[P8_NEST_MAX_CHIPS]; | +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS]; | + | +static int nest_event_info(struct property *pp, char *start, nit: s/start/name/? | + struct nest_ima_events *p8_events, int flg, u32 val) nit: s/flg/string/? | +{ | + char *buf; | + | + /* memory for event name */ | + buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL); | + if (!buf) | + return -ENOMEM; | + | + strncpy(buf, start, strlen(start)); | + p8_events-ev_name = buf; | + | + /* memory for content */ | + buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL); | + if (!buf) | + return -ENOMEM; | + | + if (flg) { | + /* string content*/ | + if (!pp-value || | +(strnlen(pp-value, pp-length) == pp-length)) | + return -EINVAL; | + | + strncpy(buf, (const char *)pp-value, pp-length); | + } else | + sprintf(buf, event=0x%x, val); | + | + p8_events-ev_value = buf; | + return 0; | +} | + | +static int nest_pmu_create(struct device_node *dev, int pmu_index) | +{ | + struct nest_ima_events **p8_events_arr, *p8_events; | + struct nest_pmu *pmu_ptr; | + struct property *pp; | + char *buf, *start; | + const __be32 *lval; | + u32 val; | + int idx = 0, ret; | + | + if (!dev) | + return -EINVAL; | + | + /* memory for nest pmus */ | + pmu_ptr = kzalloc(sizeof(struct nest_pmu), GFP_KERNEL); | + if (!pmu_ptr) | + return -ENOMEM; | + | + /* Needed for hotplug/migration */ | + per_nest_pmu_arr[pmu_index] = pmu_ptr; | + | + /* memory for nest pmu events */ | + p8_events_arr = kzalloc((sizeof(struct nest_ima_events) * 64), | + GFP_KERNEL); | + if (!p8_events_arr) | + return -ENOMEM; | + p8_events = (struct nest_ima_events *)p8_events_arr; | + | + /* | + * Loop through each property | + */ | + for_each_property_of_node(dev, pp) { | + start = pp-name; | + | + if (!strcmp(pp-name, name)) { | + if (!pp-value || | +(strnlen(pp-value, pp-length) == pp-length)) | + return -EINVAL; Do we need to check the string length here? If so, should we check against size we are going to allocate below (P8_NEST_MAX_PMU_NAME_LEN)? Or is it possible pp-value is not NULL terminated? | + | + buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL); | + if (!buf) | + return -ENOMEM; | + | + /* Save the name to register it later */ | + sprintf(buf, Nest_%s, (char *)pp-value); | + pmu_ptr-pmu.name = (char *)buf; | + continue; | + } | + | + /* Skip these, we dont need it */ | + if (!strcmp(pp-name, phandle) || | + !strcmp(pp-name, device_type) || | + !strcmp(pp-name, linux,phandle)) | + continue; | + | + if (strncmp(pp-name, unit., 5) == 0) { | + /* Skip first few chars in the name */ | + start += 5; | + ret = nest_event_info(pp, start, p8_events++, 1, 0); | + } else if (strncmp(pp-name, scale., 6) == 0) { | + /* Skip first few chars in the name */ | + start += 6
Re: [PATCH v4 5/7] powerpc/powernv: add event attribute and group to nest pmu
Madhavan Srinivasan [ma...@linux.vnet.ibm.com] wrote: | Add code to create event/format attributes and attribute groups for | each nest pmu. | | Cc: Michael Ellerman m...@ellerman.id.au | Cc: Benjamin Herrenschmidt b...@kernel.crashing.org | Cc: Paul Mackerras pau...@samba.org | Cc: Anton Blanchard an...@samba.org | Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com | Cc: Anshuman Khandual khand...@linux.vnet.ibm.com | Cc: Stephane Eranian eran...@google.com | Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com | --- | arch/powerpc/perf/nest-pmu.c | 57 | 1 file changed, 57 insertions(+) | | diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c | index 6116ff3..20ed9f8 100644 | --- a/arch/powerpc/perf/nest-pmu.c | +++ b/arch/powerpc/perf/nest-pmu.c | @@ -13,6 +13,17 @@ | static struct perchip_nest_info p8_nest_perchip_info[P8_NEST_MAX_CHIPS]; | static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS]; | | +PMU_FORMAT_ATTR(event, config:0-20); | +struct attribute *p8_nest_format_attrs[] = { name collision unlikely, but could this be static struct? | + format_attr_event.attr, | + NULL, | +}; | + | +struct attribute_group p8_nest_format_group = { static struct? | + .name = format, | + .attrs = p8_nest_format_attrs, | +}; | + | static int nest_event_info(struct property *pp, char *start, | struct nest_ima_events *p8_events, int flg, u32 val) | { | @@ -45,6 +56,48 @@ static int nest_event_info(struct property *pp, char *start, | return 0; | } | | +/* | + * Populate event name and string in attribute | + */ | +struct attribute *dev_str_attr(const char *name, const char *str) static function? | +{ | + struct perf_pmu_events_attr *attr; | + | + attr = kzalloc(sizeof(*attr), GFP_KERNEL); | + We recently needed following in 24x7 counters to keep lockdep happy: sysfs_attr_init(attr-attr.attr); | + attr-event_str = str; | + attr-attr.attr.name = name; | + attr-attr.attr.mode = 0444; | + attr-attr.show = perf_event_sysfs_show; | + | + return attr-attr.attr; | +} | + | +int update_events_in_group( static function? nit: do we need a new line before the first parameter? some functions in the file don't add the new line. | + struct nest_ima_events *p8_events, int nevents, struct nest_pmu *pmu) s/idx/nevents/? | +{ | + struct attribute_group *attr_group; | + struct attribute **attrs; | + int i; | + | + /* Allocate memory for event attribute group */ | + attr_group = kzalloc(((sizeof(struct attribute *) * (idx + 1)) + | + sizeof(*attr_group)), GFP_KERNEL); | + if (!attr_group) | + return -ENOMEM; | + | + attrs = (struct attribute **)(attr_group + 1); Can you add a comment on the +1? | + attr_group-name = events; | + attr_group-attrs = attrs; | + | + for (i = 0; i idx; i++, p8_events++) | + attrs[i] = dev_str_attr((char *)p8_events-ev_name, | + (char *)p8_events-ev_value); | + | + pmu-attr_groups[0] = attr_group; The -attr_groups[0] is initialized here, after the -attr_groups[1] and attr_groups[2] are initialized in caller. Since, -attr_groups[1] and -attr_groups[2] are set to global (loop-invariant) values, can we initialize all the attribute-groups here? May need to rename function. | + return 0; | +} | + | static int nest_pmu_create(struct device_node *dev, int pmu_index) | { | struct nest_ima_events **p8_events_arr, *p8_events; | @@ -91,6 +144,7 @@ static int nest_pmu_create(struct device_node *dev, int pmu_index) | /* Save the name to register it later */ | sprintf(buf, Nest_%s, (char *)pp-value); | pmu_ptr-pmu.name = (char *)buf; | + pmu_ptr-attr_groups[1] = p8_nest_format_group; | continue; | } | | @@ -122,6 +176,9 @@ static int nest_pmu_create(struct device_node *dev, int pmu_index) | idx++; | } | | + update_events_in_group( nit: need newline before first param? | + (struct nest_ima_events *)p8_events_arr, idx, pmu_ptr); | + | return 0; | } | | -- | 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/perf/24x7: Fix lockdep warning
From 370152d9427e57cd9632b00189f71099f8e85544 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Date: Tue, 7 Jul 2015 12:21:10 -0400 Subject: [PATCH 1/1] powerpc/perf/24x7: Fix lockdep warning The sysfs attributes for the 24x7 counters are dynamically allocated. Initialize the attributes using sysfs_attr_init() to fix following warning which occurs when CONFIG_DEBUG_LOCK_VMALLOC=y. [0.346249] audit: initializing netlink subsys (disabled) [0.346284] audit: type=2000 audit(1436295254.340:1): initialized [0.346489] BUG: key c000efe90198 not in .data! [0.346491] DEBUG_LOCKS_WARN_ON(1) [0.346502] [ cut here ] [0.346504] WARNING: at ../kernel/locking/lockdep.c:3002 [0.346506] Modules linked in: Reported-by: Gustavo Luiz Duarte gustav...@linux.vnet.ibm.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20..df95629 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str) if (!attr) return NULL; + sysfs_attr_init(attr-attr.attr); + attr-var = str; attr-attr.attr.name = name; attr-attr.attr.mode = 0444; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 6/7]powerpc/powernv: generic nest pmu event functions
Madhavan Srinivasan [ma...@linux.vnet.ibm.com] wrote: | From: Madhavan Srinivasan ma...@linux.vnet.ibm.com | Subject: [PATCH v2 6/7]powerpc/powernv: generic nest pmu event functions | | Add generic format attribute and set of generic nest pmu related | event functions to be used by each nest pmu. Add code to register nest pmus. | | Cc: Michael Ellerman m...@ellerman.id.au | Cc: Benjamin Herrenschmidt b...@kernel.crashing.org | Cc: Paul Mackerras pau...@samba.org | Cc: Anton Blanchard an...@samba.org | Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com | Cc: Anshuman Khandual khand...@linux.vnet.ibm.com | Cc: Stephane Eranian eran...@google.com | Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com | --- | arch/powerpc/perf/nest-pmu.c | 109 +++ | 1 file changed, 109 insertions(+) | | diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c | index 8fad2d9..a662c14 100644 | --- a/arch/powerpc/perf/nest-pmu.c | +++ b/arch/powerpc/perf/nest-pmu.c | @@ -13,6 +13,108 @@ | static struct perchip_nest_info p8_perchip_nest_info[P8_MAX_CHIP]; | static struct nest_pmu *per_nest_pmu_arr[P8_MAX_NEST_PMUS]; | | +PMU_FORMAT_ATTR(event, config:0-20); | +struct attribute *p8_nest_format_attrs[] = { | + format_attr_event.attr, | + NULL, | +}; | + | +struct attribute_group p8_nest_format_group = { | + .name = format, | + .attrs = p8_nest_format_attrs, | +}; Could this be included in previous/separate patch? That way, this patch could focus on just registering the nest-pmu. | + | +static int p8_nest_event_init(struct perf_event *event) | +{ | + int chip_id; | + | + if (event-attr.type != event-pmu-type) | + return -ENOENT; | + | + /* Sampling not supported yet */ | + if (event-hw.sample_period) | + return -EINVAL; | + | + /* unsupported modes and filters */ | + if (event-attr.exclude_user || | + event-attr.exclude_kernel || | + event-attr.exclude_hv || | + event-attr.exclude_idle || | + event-attr.exclude_host || | + event-attr.exclude_guest) | + return -EINVAL; | + | + if (event-cpu 0) | + return -EINVAL; | + | + chip_id = topology_physical_package_id(event-cpu); | + event-hw.event_base = event-attr.config + | + p8_perchip_nest_info[chip_id].vbase; | + | + return 0; | +} | + | +static void p8_nest_read_counter(struct perf_event *event) | +{ | + u64 *addr; | Define as uint64_t so we can eliminate one cast below? Would also be consistent with p8_nest_perf_event_update(). | | + u64 data = 0; | + | + addr = (u64 *)event-hw.event_base; | + data = __be64_to_cpu((uint64_t)*addr); | + local64_set(event-hw.prev_count, data); | +} | + | +static void p8_nest_perf_event_update(struct perf_event *event) | +{ | + u64 counter_prev, counter_new, final_count; | + uint64_t *addr; | + | + addr = (u64 *)event-hw.event_base; uint64_t *? | + counter_prev = local64_read(event-hw.prev_count); | + counter_new = __be64_to_cpu((uint64_t)*addr); Redundant cast? addr is already uint64_t *? | + final_count = counter_new - counter_prev; | + | + local64_set(event-hw.prev_count, counter_new); | + local64_add(final_count, event-count); | +} | + | +static void p8_nest_event_start(struct perf_event *event, int flags) | +{ Check PERF_EF_RELOAD before reloading? | + event-hw.state = 0; | + p8_nest_read_counter(event); | +} | + | +static void p8_nest_event_stop(struct perf_event *event, int flags) | +{ Check PERF_EF_UPDATE when stopping? | + p8_nest_perf_event_update(event); | +} | + | +static int p8_nest_event_add(struct perf_event *event, int flags) | +{ Check PERF_EF_START flags before starting the counter on an -add()? | + p8_nest_event_start(event, flags); | + return 0; | +} | + | +/* | + * Populate pmu ops in the structure | + */ | +static int update_pmu_ops(struct nest_pmu *pmu) | +{ | + if (!pmu) | + return -EINVAL; | + | + pmu-pmu.task_ctx_nr = perf_invalid_context; | + pmu-pmu.event_init = p8_nest_event_init; | + pmu-pmu.add = p8_nest_event_add; | + pmu-pmu.del = p8_nest_event_stop; | + pmu-pmu.start = p8_nest_event_start; | + pmu-pmu.stop = p8_nest_event_stop; | + pmu-pmu.read = p8_nest_perf_event_update; | + pmu-pmu.attr_groups = pmu-attr_groups; | + | + return 0; | +} | + | /* | * Populate event name and string in attribute | */ | @@ -106,6 +208,7 @@ static int nest_pmu_create(struct device_node *dev, int pmu_index) | /* Save the name to register the PMU with it */ | sprintf(buf, Nest_%s, (char *)pp-value); | pmu_ptr-pmu.name = (char *)buf; | + pmu_ptr-attr_groups[1] = p8_nest_format_group; | } | | /* Skip these, we dont need it */ | @@ -179,6
[PATCH v15 08/19] perf, tools: Support CPU id matching for x86 v2
From: Andi Kleen a...@linux.intel.com Implement the code to match CPU types to mapfile types for x86 based on CPUID. This extends an existing similar function, but changes it to use the x86 mapfile cpu description. This allows to resolve event lists generated by jevents. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Update to new get_cpuid_str() interface --- tools/perf/arch/x86/util/header.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 146d12a..a74a48d 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, : a (op)); } -int -get_cpuid(char *buffer, size_t sz) +static int +__get_cpuid(char *buffer, size_t sz, const char *fmt) { unsigned int a, b, c, d, lvl; int family = -1, model = -1, step = -1; @@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz) if (family = 0x6) model += ((a 16) 0xf) 4; } - nb = scnprintf(buffer, sz, %s,%u,%u,%u$, vendor, family, model, step); + nb = scnprintf(buffer, sz, fmt, vendor, family, model, step); /* look for end marker to ensure the entire data fit */ if (strchr(buffer, '$')) { @@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +int +get_cpuid(char *buffer, size_t sz) +{ + return __get_cpuid(buffer, sz, %s,%u,%u,%u$); +} + +char * +get_cpuid_str(void) +{ + char *buf = malloc(128); + + if (__get_cpuid(buf, 128, %s-%u-%X$) 0) { + free(buf); + return NULL; + } + return buf; +} -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 11/19] perf, tools: Add a --no-desc flag to perf list
From: Andi Kleen a...@linux.intel.com Add a --no-desc flag to perf list to not print the event descriptions that were earlier added for JSON events. This may be useful to get a less crowded listing. It's still default to print descriptions as that is the more useful default for most users. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Rename --quiet to --no-desc. Add option to man page. --- tools/perf/Documentation/perf-list.txt | 8 +++- tools/perf/builtin-list.c | 12 tools/perf/util/parse-events.c | 4 ++-- tools/perf/util/parse-events.h | 2 +- tools/perf/util/pmu.c | 4 ++-- tools/perf/util/pmu.h | 2 +- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index bada893..9507552 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,13 +8,19 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +OPTIONS +--- +--no-desc:: +Don't print descriptions. + + [[EVENT_MODIFIERS]] EVENT MODIFIERS --- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index af5bd05..3f058f7 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -16,16 +16,20 @@ #include util/pmu.h #include util/parse-options.h +static bool desc_flag = true; + int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; struct option list_options[] = { OPT_BOOLEAN(0, raw-dump, raw_dump, Dump raw events), + OPT_BOOLEAN('d', desc, desc_flag, + Print extra event descriptions. --no-desc to not print.), OPT_END() }; const char * const list_usage[] = { - perf list [hw|sw|cache|tracepoint|pmu|event_glob], + perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob], NULL }; @@ -40,7 +44,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf(\nList of pre-defined events (to be used in -e):\n\n); if (argc == 0) { - print_events(NULL, raw_dump); + print_events(NULL, raw_dump, !desc_flag); return 0; } @@ -59,13 +63,13 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], hwcache) == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], pmu) == 0) - print_pmu_events(NULL, raw_dump); + print_pmu_events(NULL, raw_dump, !desc_flag); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump); + print_events(argv[i], raw_dump, !desc_flag); continue; } sep_idx = sep - argv[i]; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2a4d1ec..65f7572 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1521,7 +1521,7 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only) +void print_events(const char *event_glob, bool name_only, bool quiet_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1531,7 +1531,7 @@ void print_events(const char *event_glob, bool name_only) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only); + print_pmu_events(event_glob, name_only, quiet_flag); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 131f29b..d11f854 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -133,7 +133,7 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only); +void print_events(const char *event_glob, bool name_only, bool quiet
[PATCH V15 00/19] perf, tools: Add support for PMU events in JSON format
CPUs support a large number of performance monitoring events (PMU events) and often these events are very specific to an architecture/model of the CPU. To use most of these PMU events with perf, we currently have to identify them by their raw codes: perf stat -e r100f2 sleep 1 This patchset allows architectures to specify these PMU events in JSON files located in 'tools/perf/pmu-events/arch/' of the mainline tree. The events from the JSON files for the architecture are then built into the perf binary. At run time, perf identifies the specific set of events for the CPU and creates event aliases. These aliases allow users to specify events by name as: perf stat -e pm_1plus_ppc_cmpl sleep 1 The file, 'tools/perf/pmu-events/README' in [PATCH 19/19] gives more details. Note: - All known events tables for the architecture are included in the perf binary. - For architectures that don't have any JSON files, an empty mapping table is created and they should continue to build) Thanks to input from Andi Kleen, Jiri Olsa, Namhyung Kim and Ingo Molnar. These patches are available from: https://github.com:sukadev/linux.git Branch Description -- json-v15Source Code only json-files-5x86 and Powerpc datafiles only json-v15-with-data Both code and data (build/test) NOTE: Only source code patches (i.e those in json-v15) are being emailed. Please pull the data files from the json-files-5 branch. Changelog[v15] Code changes: - Fix 'perf list' usage string and update man page. - Remove a redundant __maybe_unused tag. - Rebase to recent perf/core branch. Data files updates: json-files-5 branch - Rebase to perf/intel-json-files-5 from Andi Kleen - Add patch from Madhavan Srinivasan for couple more Powerpc models Changelog[v14] Comments from Jiri Olsa: - Change parameter name/type for pmu_add_cpu_aliases (from void *data to list_head *head) - Use asprintf() in file_name_to_tablename() and simplify/reorg code. - Use __weak definition from linux/compile.h - Use fopen() with mode w and eliminate unlink() - Remove minor TODO. - Add error check for return value from strdup() in print_pmu_events(). - Move independent changes from patches 3,11,12 .. to separate patches for easier review/backport. - Clarify mapfile's header line support in patch description. - Fix build failure with DEBUG=1 Comment from Andi Kleen: - In tools/perf/pmu-events/Build, check for 'mapfile.csv' rather than 'mapfile*' Misc: - Minor changes/clarifications to tools/perf/pmu-events/README. Changelog[v13] Version: Individual patches have their own history :-) that I am preserving. Patchset version (v13) is for overall patchset and is somewhat arbitrary. - Added support for categories of events to perf - Add mapfile, jevents build dependency on pmu-events.c - Silence jevents when parsing JSON files unless V=1 is specified - Cleanup error messages - Fix memory leak with -cpuid - Rebase to Arnaldo's tree - Allow overriding CPUID via environment variable - Support long descriptions for events - Handle header line in mapfile.csv - Cleanup JSON files (trim PublicDescription if identical to/prefix of BriefDescription field) *** BLURB HERE *** Andi Kleen (10): perf, tools: Add jsmn `jasmine' JSON parser perf, tools, jevents: Program to convert JSON file to C style file perf, tools: Allow events with dot perf, tools: Support CPU id matching for x86 v2 perf, tools: Support alias descriptions perf, tools: Query terminal width and use in perf list perf, tools: Add a --no-desc flag to perf list perf, tools: Add override support for event list CPUID perf, tools: Add support for event list topics perf, tools: Handle header line in mapfile Sukadev Bhattiprolu (9): Use __weak definition from linux/compiler.h perf, tools: Split perf_pmu__new_alias() perf, tools: Use pmu_events table to create aliases perf, tools: Support CPU ID matching for Powerpc perf, tools, jevents: Add support for long descriptions perf, tools: Add alias support for long descriptions perf, tools: Support long descriptions with perf list perf, tools, jevents: Add support for event topics perf, tools: Add README for info on parsing JSON/map files tools/perf/Documentation/perf-list.txt | 12 +- tools/perf/Makefile.perf | 25 +- tools/perf/arch/powerpc/util/header.c | 11 + tools/perf/arch/x86/util/header.c | 24 +- tools/perf/builtin-list.c | 17 +- tools/perf/pmu-events/Build
[PATCH v15 09/19] perf, tools: Support alias descriptions
From: Andi Kleen a...@linux.intel.com Add support to print alias descriptions in perf list, which are taken from the generated event files. The sorting code is changed to put the events with descriptions at the end. The descriptions are printed as possibly multiple word wrapped lines. Example output: % perf list ... arith.fpu_div [Divide operations executed] arith.fpu_div_active [Cycles when divider is busy executing divide operations] Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog - Delete a redundant free() Changelog[v14] - [Jiri Olsa] Fail, rather than continue if strdup() returns NULL; remove unnecessary __maybe_unused. --- tools/perf/util/pmu.c | 82 +-- tools/perf/util/pmu.h | 1 + 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7863d05..083cbc6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -209,7 +209,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc __maybe_unused, char *val) +char *desc, char *val) { struct perf_pmu_alias *alias; int ret; @@ -241,6 +241,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias-desc = desc ? strdup(desc) : NULL; + list_add_tail(alias-list, list); return 0; @@ -989,11 +991,42 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -static int cmp_string(const void *a, const void *b) +struct pair { + char *name; + char *desc; +}; + +static int cmp_pair(const void *a, const void *b) +{ + const struct pair *as = a; + const struct pair *bs = b; + + /* Put extra events last */ + if (!!as-desc != !!bs-desc) + return !!as-desc - !!bs-desc; + return strcmp(as-name, bs-name); +} + +static void wordwrap(char *s, int start, int max, int corr) { - const char * const *as = a; - const char * const *bs = b; - return strcmp(*as, *bs); + int column = start; + int n; + + while (*s) { + int wlen = strcspn(s, \t); + + if (column + wlen = max column start) { + printf(\n%*s, start, ); + column = start + corr; + } + n = printf(%s%.*s, column start ? : , wlen, s); + if (n = 0) + break; + s += wlen; + column += n; + while (isspace(*s)) + s++; + } } void print_pmu_events(const char *event_glob, bool name_only) @@ -1003,7 +1036,9 @@ void print_pmu_events(const char *event_glob, bool name_only) char buf[1024]; int printed = 0; int len, j; - char **aliases; + struct pair *aliases; + int numdesc = 0; + int columns = 78; pmu = NULL; len = 0; @@ -1013,14 +1048,15 @@ void print_pmu_events(const char *event_glob, bool name_only) if (pmu-selectable) len++; } - aliases = zalloc(sizeof(char *) * len); + aliases = zalloc(sizeof(struct pair) * len); if (!aliases) goto out_enomem; pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, pmu-aliases, list) { - char *name = format_alias(buf, sizeof(buf), pmu, alias); + char *name = alias-desc ? alias-name : + format_alias(buf, sizeof(buf), pmu, alias); bool is_cpu = !strcmp(pmu-name, cpu); if (event_glob != NULL @@ -1029,37 +1065,51 @@ void print_pmu_events(const char *event_glob, bool name_only) event_glob continue; - if (is_cpu !name_only) + if (is_cpu !name_only !alias-desc) name = format_alias_or(buf, sizeof(buf), pmu, alias); - aliases[j] = strdup(name); - if (aliases[j] == NULL) + aliases[j].name = name; + if (is_cpu !name_only !alias-desc) + aliases[j].name = format_alias_or(buf, sizeof(buf), + pmu, alias); + aliases[j].name = strdup(aliases[j].name
[PATCH v15 14/19] perf, tools: Add alias support for long descriptions
Previously we were dropping the useful longer descriptions that some events have in the event list completely. Now that jevents provides support for longer descriptions (see previous patch), add support for parsing the long descriptions Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/util/parse-events.c | 5 +++-- tools/perf/util/parse-events.h | 3 ++- tools/perf/util/pmu.c | 15 ++- tools/perf/util/pmu.h | 4 +++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 65f7572..c4ee41d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1521,7 +1521,8 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1531,7 +1532,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d11f854..5c93814 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -133,7 +133,8 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only, bool quiet); +void print_events(const char *event_glob, bool name_only, bool quiet, + bool long_desc); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2a7abac..61c86c8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc, char *val) +char *desc, char *val, char *long_desc) { struct perf_pmu_alias *alias; int ret; @@ -243,6 +243,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, } alias-desc = desc ? strdup(desc) : NULL; + alias-long_desc = long_desc ? strdup(long_desc) : + desc ? strdup(desc) : NULL; list_add_tail(alias-list, list); @@ -260,7 +262,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -508,7 +510,8 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe-name, - (char *)pe-desc, (char *)pe-event); + (char *)pe-desc, (char *)pe-event, + (char *)pe-long_desc); } out: @@ -1036,7 +1039,8 @@ static void wordwrap(char *s, int start, int max, int corr) } } -void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1083,7 +1087,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) if (!aliases[j].name) goto out_enomem; - aliases[j].desc = alias-desc; + aliases[j].desc = long_desc ? alias-long_desc : + alias-desc; j++; } if (pmu-selectable) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 9966c1a..10e981c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -39,6 +39,7 @@ struct perf_pmu_info { struct perf_pmu_alias { char *name; char *desc; + char *long_desc
[PATCH v15 02/19] perf, tools, jevents: Program to convert JSON file to C style file
From: Andi Kleen a...@linux.intel.com This is a modified version of an earlier patch by Andi Kleen. We expect architectures to describe the performance monitoring events for each CPU in a corresponding JSON file, which look like: [ { EventCode: 0x00, UMask: 0x01, EventName: INST_RETIRED.ANY, BriefDescription: Instructions retired from execution., PublicDescription: Instructions retired from execution., Counter: Fixed counter 1, CounterHTOff: Fixed counter 1, SampleAfterValue: 203, SampleAfterValue: 203, MSRIndex: 0, MSRValue: 0, TakenAlone: 0, CounterMask: 0, Invert: 0, AnyThread: 0, EdgeDetect: 0, PEBS: 0, PRECISE_STORE: 0, Errata: null, Offcore: 0 } ] We also expect the architectures to provide a mapping between individual CPUs to their JSON files. Eg: GenuineIntel-6-1E,V1,/NHM-EP/NehalemEP_core_V1.json,core which maps each CPU, identified by [vendor, family, model, version, type] to a JSON file. Given these files, the program, jevents:: - locates all JSON files for the architecture, - parses each JSON file and generates a C-style PMU-events table (pmu-events.c) - locates a mapfile for the architecture - builds a global table, mapping each model of CPU to the corresponding PMU-events table. The 'pmu-events.c' is generated when building perf and added to libperf.a. The global table pmu_events_map[] table in this pmu-events.c will be used in perf in a follow-on patch. If the architecture does not have any JSON files or there is an error in processing them, an empty mapping file is created. This would allow the build of perf to proceed even if we are not able to provide aliases for events. The parser for JSON files allows parsing Intel style JSON event files. This allows to use an Intel event list directly with perf. The Intel event lists can be quite large and are too big to store in unswappable kernel memory. The conversion from JSON to C-style is straight forward. The parser knows (very little) Intel specific information, and can be easily extended to handle fields for other CPUs. The parser code is partially shared with an independent parsing library, which is 2-clause BSD licenced. To avoid any conflicts I marked those files as BSD licenced too. As part of perf they become GPLv2. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Address review feedback. Rename option to --event-files v3: Add JSON example v4: Update manpages. v5: Don't remove dot in fixname. Fix compile error. Add include protection. Comment realloc. v6: Include debug/util.h v7: (Sukadev Bhattiprolu) Rebase to 4.0 and fix some conflicts. v8: (Sukadev Bhattiprolu) Move jevents.[hc] to tools/perf/pmu-events/ Rewrite to locate and process arch specific JSON and map files; and generate a C file. (Removed acked-by Namhyung Kim due to modest changes to patch) Compile the generated pmu-events.c and add the pmu-events.o to libperf.a v9: [Sukadev Bhattiprolu/Andi Kleen] Rename -vfm to -cpuid and use that field to encode the PVR in Power. Allow blank lines in mapfile. [Jiri Olsa] Pass ARCH as a parameter to jevents so we don't have to detect it. [Jiri Olsa] Use the infrastrastructure to build pmu-events/perf (Makefile changes from Jiri included in this patch). [Jiri Olsa, Andi Kleen] Detect changes to JSON files and rebuild pmu-events.o only if necessary. v11:- [Andi Kleen] Add mapfile, jevents dependency on pmu-events.c - [Jiri Olsa] Be silient if arch doesn't have JSON files - Also silence 'jevents' when parsing JSON files unless V=1 is specified during build. Cleanup error messages. v14:- - [Jiri Olsa] Fix compile error with DEBUG=1; drop unlink() and use w mode with fopen(); simplify file_name_to_table_name() v15:- Fix minor conflict in tools/perf/Makefile.perf when rebasing to recent perf/core. --- tools/perf/Makefile.perf | 25 +- tools/perf/pmu-events/Build| 11 + tools/perf/pmu-events/jevents.c| 686 + tools/perf/pmu-events/jevents.h| 17 + tools/perf/pmu-events/json.h | 3 + tools/perf/pmu-events/pmu-events.h | 35 ++ 6 files changed, 773 insertions(+), 4 deletions(-) create mode 100644 tools/perf/pmu-events/Build create mode 100644 tools/perf/pmu-events/jevents.c create mode 100644 tools/perf/pmu-events/jevents.h create mode 100644 tools/perf/pmu-events/pmu-events.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b1dfcd8..26c90ea 100644 --- a/tools/perf/Makefile.perf +++ b/tools
[PATCH v15 10/19] perf, tools: Query terminal width and use in perf list
From: Andi Kleen a...@linux.intel.com Automatically adapt the now wider and word wrapped perf list output to wider terminals. This requires querying the terminal before the auto pager takes over, and exporting this information from the pager subsystem. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Namhyung Kim namhy...@kernel.org Acked-by: Jiri Olsa jo...@redhat.com --- tools/perf/util/cache.h | 1 + tools/perf/util/pager.c | 15 +++ tools/perf/util/pmu.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index c861373..8e0d4b8 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -32,6 +32,7 @@ extern const char *perf_config_dirname(const char *, const char *); extern void setup_pager(void); extern int pager_in_use(void); extern int pager_use_color; +int pager_get_columns(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 53ef006..1770c88 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -1,6 +1,7 @@ #include cache.h #include run-command.h #include sigchain.h +#include sys/ioctl.h /* * This is split up from the rest of git so that we can do @@ -8,6 +9,7 @@ */ static int spawned_pager; +static int pager_columns; static void pager_preexec(void) { @@ -47,9 +49,12 @@ static void wait_for_pager_signal(int signo) void setup_pager(void) { const char *pager = getenv(PERF_PAGER); + struct winsize sz; if (!isatty(1)) return; + if (ioctl(1, TIOCGWINSZ, sz) == 0) + pager_columns = sz.ws_col; if (!pager) pager = getenv(PAGER); if (!(pager || access(/usr/bin/pager, X_OK))) @@ -93,3 +98,13 @@ int pager_in_use(void) env = getenv(PERF_PAGER_IN_USE); return env ? perf_config_bool(PERF_PAGER_IN_USE, env) : 0; } + +int pager_get_columns(void) +{ + char *s; + + s = getenv(COLUMNS); + if (s) + return atoi(s); + return (pager_columns ? pager_columns : 80) - 2; +} diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 083cbc6..f7feb96 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -14,6 +14,7 @@ #include cpumap.h #include header.h #include pmu-events/pmu-events.h +#include cache.h struct perf_pmu_format { char *name; @@ -1038,7 +1039,7 @@ void print_pmu_events(const char *event_glob, bool name_only) int len, j; struct pair *aliases; int numdesc = 0; - int columns = 78; + int columns = pager_get_columns(); pmu = NULL; len = 0; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 15/19] perf, tools: Support long descriptions with perf list
Previously we were dropping the useful longer descriptions that some events have in the event list completely. This patch makes them appear with perf list. Old perf list: baclears: baclears.all [Counts the number of baclears] vs new: perf list -v: ... baclears: baclears.all [The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. The BACLEARS.ANY event counts the number of baclears for any type of branch] Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v15] - [Jir Olsa, Andi Kleen] Fix usage strings; update man page. Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/Documentation/perf-list.txt | 6 +- tools/perf/builtin-list.c | 13 + 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 9507552..48202f2 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- @@ -20,6 +20,10 @@ OPTIONS --no-desc:: Don't print descriptions. +-v:: +--long-desc:: +Print longer event descriptions. + [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 3f058f7..f800927 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -22,14 +22,17 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; + bool long_desc_flag = false; struct option list_options[] = { OPT_BOOLEAN(0, raw-dump, raw_dump, Dump raw events), OPT_BOOLEAN('d', desc, desc_flag, Print extra event descriptions. --no-desc to not print.), + OPT_BOOLEAN('v', long-desc, long_desc_flag, + Print longer event descriptions.), OPT_END() }; const char * const list_usage[] = { - perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob], + perf list [options] [hw|sw|cache|tracepoint|pmu|event_glob], NULL }; @@ -44,7 +47,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf(\nList of pre-defined events (to be used in -e):\n\n); if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag); return 0; } @@ -63,13 +66,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], hwcache) == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], pmu) == 0) - print_pmu_events(NULL, raw_dump, !desc_flag); + print_pmu_events(NULL, raw_dump, !desc_flag, + long_desc_flag); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump, !desc_flag); + print_events(argv[i], raw_dump, !desc_flag, + long_desc_flag); continue; } sep_idx = sep - argv[i]; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 04/19] perf, tools: Split perf_pmu__new_alias()
Separate the event parsing code in perf_pmu__new_alias() out into a separate function __perf_pmu__new_alias() so that code can be called indepdently. This is based on an earlier patch from Andi Kleen. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- tools/perf/util/pmu.c | 42 +++--- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c6b16b1..7bcb8c3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -206,17 +206,12 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, +char *desc __maybe_unused, char *val) { struct perf_pmu_alias *alias; - char buf[256]; int ret; - ret = fread(buf, 1, sizeof(buf), file); - if (ret == 0) - return -EINVAL; - buf[ret] = 0; - alias = malloc(sizeof(*alias)); if (!alias) return -ENOMEM; @@ -226,26 +221,43 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI alias-unit[0] = '\0'; alias-per_pkg = false; - ret = parse_events_terms(alias-terms, buf); + ret = parse_events_terms(alias-terms, val); if (ret) { + pr_err(Cannot parse alias %s: %d\n, val, ret); free(alias); return ret; } alias-name = strdup(name); - /* -* load unit name and scale if available -*/ - perf_pmu__parse_unit(alias, dir, name); - perf_pmu__parse_scale(alias, dir, name); - perf_pmu__parse_per_pkg(alias, dir, name); - perf_pmu__parse_snapshot(alias, dir, name); + if (dir) { + /* +* load unit name and scale if available +*/ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); + } list_add_tail(alias-list, list); return 0; } +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +{ + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + + buf[ret] = 0; + + return __perf_pmu__new_alias(list, dir, name, NULL, buf); +} + static inline bool pmu_alias_info_file(char *name) { size_t len; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 06/19] perf, tools: Support CPU ID matching for Powerpc
Implement code that returns the generic CPU ID string for Powerpc. This will be used to identify the specific table of PMU events to parse/compare user specified events against. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/arch/powerpc/util/header.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a7..65f9391 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +char * +get_cpuid_str(void) +{ + char *bufp; + + if (asprintf(bufp, %.8lx, mfspr(SPRN_PVR)) 0) + bufp = NULL; + + return bufp; +} -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 17/19] perf, tools: Add support for event list topics
From: Andi Kleen a...@linux.intel.com Add support to group the output of perf list by the Topic field in the JSON file. Example output: % perf list ... Cache: l1d.replacement [L1D data line replacements] l1d_pend_miss.pending [L1D miss oustandings duration in cycles] l1d_pend_miss.pending_cycles [Cycles with L1D load Misses outstanding] l2_l1d_wb_rqsts.all [Not rejected writebacks from L1D to L2 cache lines in any state] l2_l1d_wb_rqsts.hit_e [Not rejected writebacks from L1D to L2 cache lines in E state] l2_l1d_wb_rqsts.hit_m [Not rejected writebacks from L1D to L2 cache lines in M state] ... Pipeline: arith.fpu_div [Divide operations executed] arith.fpu_div_active [Cycles when divider is busy executing divide operations] baclears.any [Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end] br_inst_exec.all_branches [Speculative and retired branches] br_inst_exec.all_conditional [Speculative and retired macro-conditional branches] br_inst_exec.all_direct_jmp [Speculative and retired macro-unconditional branches excluding calls and indirects] br_inst_exec.all_direct_near_call [Speculative and retired direct near calls] br_inst_exec.all_indirect_jump_non_call_ret Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Move jevents support for Topic to a separate patch. --- tools/perf/util/pmu.c | 37 +++-- tools/perf/util/pmu.h | 1 + 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 61c86c8..7849498 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc, char *val, char *long_desc) +char *desc, char *val, char *long_desc, +char *topic) { struct perf_pmu_alias *alias; int ret; @@ -245,6 +246,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias-desc = desc ? strdup(desc) : NULL; alias-long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; + alias-topic = topic ? strdup(topic) : NULL; list_add_tail(alias-list, list); @@ -262,7 +264,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -511,7 +513,7 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe-name, (char *)pe-desc, (char *)pe-event, - (char *)pe-long_desc); + (char *)pe-long_desc, (char *)pe-topic); } out: @@ -1001,19 +1003,26 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -struct pair { +struct sevent { char *name; char *desc; + char *topic; }; -static int cmp_pair(const void *a, const void *b) +static int cmp_sevent(const void *a, const void *b) { - const struct pair *as = a; - const struct pair *bs = b; + const struct sevent *as = a; + const struct sevent *bs = b; /* Put extra events last */ if (!!as-desc != !!bs-desc) return !!as-desc - !!bs-desc; + if (as-topic bs-topic) { + int n = strcmp(as-topic, bs-topic); + + if (n) + return n; + } return strcmp(as-name, bs-name); } @@ -1047,9 +1056,10 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, char buf[1024]; int printed = 0; int len, j; - struct pair *aliases; + struct sevent *aliases; int numdesc = 0; int columns = pager_get_columns(); + char *topic = NULL; pmu = NULL; len = 0; @@ -1059,7 +1069,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, if (pmu-selectable) len++; } - aliases = zalloc(sizeof(struct pair) * len); + aliases = zalloc(sizeof(struct sevent) * len
[PATCH v15 05/19] perf, tools: Use pmu_events table to create aliases
At run time (when 'perf' is starting up), locate the specific table of PMU events that corresponds to the current CPU. Using that table, create aliases for the each of the PMU events in the CPU. The use these aliases to parse the user specified perf event. In short this would allow the user to specify events using their aliases rather than raw event codes. Based on input and some earlier patches from Andi Kleen, Jiri Olsa. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v4] - Split off unrelated code into separate patches. Changelog[v3] - [Jiri Olsa] Fix memory leak in cpuid Changelog[v2] - [Andi Kleen] Replace pmu_events_map-vfm with a generic cpuid. --- tools/perf/util/header.h | 1 + tools/perf/util/pmu.c| 61 2 files changed, 62 insertions(+) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d5796..996e899 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -157,4 +157,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); */ int get_cpuid(char *buffer, size_t sz); +char *get_cpuid_str(void); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7bcb8c3..7863d05 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,8 @@ #include pmu.h #include parse-events.h #include cpumap.h +#include header.h +#include pmu-events/pmu-events.h struct perf_pmu_format { char *name; @@ -449,6 +451,62 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } +/* + * Return the CPU id as a raw string. + * + * Each architecture should provide a more precise id string that + * can be use to match the architecture's mapfile. + */ +char * __weak get_cpuid_str(void) +{ + return NULL; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static int pmu_add_cpu_aliases(struct list_head *head) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + char *cpuid; + + cpuid = get_cpuid_str(); + if (!cpuid) + return 0; + + i = 0; + while (1) { + map = pmu_events_map[i++]; + if (!map-table) + goto out; + + if (!strcmp(map-cpuid, cpuid)) + break; + } + + /* +* Found a matching PMU events table. Create aliases +*/ + i = 0; + while (1) { + pe = map-table[i++]; + if (!pe-name) + break; + + /* need type casts to override 'const' */ + __perf_pmu__new_alias(head, NULL, (char *)pe-name, + (char *)pe-desc, (char *)pe-event); + } + +out: + free(cpuid); + return 0; +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -477,6 +535,9 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, aliases)) return NULL; + if (!strcmp(name, cpu)) + (void)pmu_add_cpu_aliases(aliases); + if (pmu_type(name, type)) return NULL; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 07/19] perf, tools: Allow events with dot
From: Andi Kleen a...@linux.intel.com The Intel events use a dot to separate event name and unit mask. Allow dot in names in the scanner, and remove special handling of dot as EOF. Also remove the hack in jevents to replace dot with underscore. This way dotted events can be specified directly by the user. I'm not fully sure this change to the scanner is correct (what was the dot special case good for?), but I haven't found anything that breaks with it so far at least. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Namhyung Kim namhy...@kernel.org Acked-by: Jiri Olsa jo...@redhat.com --- V2: Add the dot to name too, to handle events outside cpu// --- tools/perf/util/parse-events.l | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 09e738f..13cef3c 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -119,8 +119,8 @@ event [^,{}/]+ num_dec[0-9]+ num_hex0x[a-fA-F0-9]+ num_raw_hex[a-fA-F0-9]+ -name [a-zA-Z_*?][a-zA-Z0-9_*?]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* +name [a-zA-Z_*?][a-zA-Z0-9_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpGHSDI]+ modifier_bp[rwx]{1,3} @@ -165,7 +165,6 @@ modifier_bp [rwx]{1,3} return PE_EVENT_NAME; } -. | EOF{ BEGIN(INITIAL); REWIND(0); -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 16/19] perf, tools, jevents: Add support for event topics
Allow assigning categories Topics field to the PMU events i.e. process the topic field from the JSON file and add a corresponding topic field to the generated C events tables. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/pmu-events/jevents.c| 12 +--- tools/perf/pmu-events/jevents.h| 2 +- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index a8507c9..ea3474b 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc, char *long_desc) + char *desc, char *long_desc, char *topic) { FILE *outfp = data; /* @@ -217,6 +217,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, \t.desc = \%s\,\n, desc); if (long_desc long_desc[0]) fprintf(outfp, \t.long_desc = \%s\,\n, long_desc); + if (topic) + fprintf(outfp, \t.topic = \%s\,\n, topic); fprintf(outfp, },\n); @@ -238,7 +240,7 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc), + char *long_desc, char *topic), void *data) { int err = -EIO; @@ -259,6 +261,7 @@ int json_events(const char *fn, char *event = NULL, *desc = NULL, *name = NULL; char *long_desc = NULL; char *extra_desc = NULL; + char *topic = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -297,6 +300,8 @@ int json_events(const char *fn, !json_streq(map, val, null)) { addfield(map, extra_desc, . , Spec update: , val); + } else if (json_streq(map, field, Topic)) { + addfield(map, topic, , , val); } else if (json_streq(map, field, Data_LA) nz) { addfield(map, extra_desc, . , Supports address when precise, @@ -320,12 +325,13 @@ int json_events(const char *fn, addfield(map, event, ,, msr-pname, msrval); fixname(name); - err = func(data, name, event, desc, long_desc); + err = func(data, name, event, desc, long_desc, topic); free(event); free(desc); free(name); free(long_desc); free(extra_desc); + free(topic); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index b0eb274..9ffcb89 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -3,7 +3,7 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc), + char *long_desc, char *topic), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 711f049..6b69f4b 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -9,6 +9,7 @@ struct pmu_event { const char *event; const char *desc; const char *long_desc; + const char *topic; }; /* -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 03/19] Use __weak definition from linux/compiler.h
Jiri Olsa pointed out, that the linux/compiler.h defines the attribute '__weak'. We might as well use that. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- tools/perf/util/pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 0fcc624..c6b16b1 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,4 +1,5 @@ #include linux/list.h +#include linux/compiler.h #include sys/types.h #include unistd.h #include stdio.h @@ -436,7 +437,7 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } -struct perf_event_attr *__attribute__((weak)) +struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { return NULL; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 18/19] perf, tools: Handle header line in mapfile
From: Andi Kleen a...@linux.intel.com To work with existing mapfiles, assume that the first line in 'mapfile.csv' is a header line and skip over it. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v2] All architectures may not use the Family to identify. So, assume first line is header. --- tools/perf/pmu-events/jevents.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ea3474b..7347cca 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -462,7 +462,12 @@ static int process_mapfile(FILE *outfp, char *fpath) print_mapping_table_prefix(outfp); - line_num = 0; + /* Skip first line (header) */ + p = fgets(line, n, mapfp); + if (!p) + goto out; + + line_num = 1; while (1) { char *cpuid, *version, *type, *fname; @@ -506,8 +511,8 @@ static int process_mapfile(FILE *outfp, char *fpath) fprintf(outfp, },\n); } +out: print_mapping_table_suffix(outfp); - return 0; } -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 01/19] perf, tools: Add jsmn `jasmine' JSON parser
From: Andi Kleen a...@linux.intel.com I need a JSON parser. This adds the simplest JSON parser I could find -- Serge Zaitsev's jsmn `jasmine' -- to the perf library. I merely converted it to (mostly) Linux style and added support for non 0 terminated input. The parser is quite straight forward and does not copy any data, just returns tokens with offsets into the input buffer. So it's relatively efficient and simple to use. The code is not fully checkpatch clean, but I didn't want to completely fork the upstream code. Original source: http://zserge.bitbucket.org/jsmn.html In addition I added a simple wrapper that mmaps a json file and provides some straight forward access functions. Used in follow-on patches to parse event files. Acked-by: Namhyung Kim namhy...@kernel.org Acked-by: Jiri Olsa jo...@redhat.com Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- v2: Address review feedback. v3: Minor checkpatch fixes. v4 (by Sukadev Bhattiprolu) - Rebase to 4.0 and fix minor conflicts in tools/perf/Makefile.perf - Report error if specified events file is invalid. v5 (Sukadev Bhattiprolu) - Move files to tools/perf/pmu-events/ since parsing of JSON file now occurs when _building_ rather than running perf. --- tools/perf/pmu-events/jsmn.c | 313 +++ tools/perf/pmu-events/jsmn.h | 67 + tools/perf/pmu-events/json.c | 162 ++ tools/perf/pmu-events/json.h | 36 + 4 files changed, 578 insertions(+) create mode 100644 tools/perf/pmu-events/jsmn.c create mode 100644 tools/perf/pmu-events/jsmn.h create mode 100644 tools/perf/pmu-events/json.c create mode 100644 tools/perf/pmu-events/json.h diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c new file mode 100644 index 000..11d1fa1 --- /dev/null +++ b/tools/perf/pmu-events/jsmn.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2010 Serge A. Zaitsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Slightly modified by AK to not assume 0 terminated input. + */ + +#include stdlib.h +#include jsmn.h + +/* + * Allocates a fresh unused token from the token pool. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *tok; + + if ((unsigned)parser-toknext = num_tokens) + return NULL; + tok = tokens[parser-toknext++]; + tok-start = tok-end = -1; + tok-size = 0; + return tok; +} + +/* + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) +{ + token-type = type; + token-start = start; + token-end = end; + token-size = 0; +} + +/* + * Fills next available token with JSON primitive. + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *token; + int start; + + start = parser-pos; + + for (; parser-pos len; parser-pos++) { + switch (js[parser-pos]) { +#ifndef JSMN_STRICT + /* +* In strict mode primitive must be followed by , +* or } or ] +*/ + case ':': +#endif + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ']': + case '}': + goto found; + default: + break; + } + if (js[parser-pos] 32 || js[parser-pos] = 127) { + parser-pos = start; + return JSMN_ERROR_INVAL
[PATCH v15 13/19] perf, tools, jevents: Add support for long descriptions
Implement support in jevents to parse long descriptions for events that may have them in the JSON files. A follow on patch will make this long description available to user through the 'perf list' command. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/pmu-events/jevents.c| 31 +++ tools/perf/pmu-events/jevents.h| 3 ++- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 5f7603b..a8507c9 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc) + char *desc, char *long_desc) { FILE *outfp = data; /* @@ -215,6 +215,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, \t.name = \%s\,\n, name); fprintf(outfp, \t.event = \%s\,\n, event); fprintf(outfp, \t.desc = \%s\,\n, desc); + if (long_desc long_desc[0]) + fprintf(outfp, \t.long_desc = \%s\,\n, long_desc); fprintf(outfp, },\n); @@ -235,7 +237,8 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc), + int (*func)(void *data, char *name, char *event, char *desc, + char *long_desc), void *data) { int err = -EIO; @@ -254,6 +257,8 @@ int json_events(const char *fn, tok = tokens + 1; for (i = 0; i tokens-size; i++) { char *event = NULL, *desc = NULL, *name = NULL; + char *long_desc = NULL; + char *extra_desc = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -279,6 +284,9 @@ int json_events(const char *fn, } else if (json_streq(map, field, BriefDescription)) { addfield(map, desc, , , val); fixdesc(desc); + } else if (json_streq(map, field, PublicDescription)) { + addfield(map, long_desc, , , val); + fixdesc(long_desc); } else if (json_streq(map, field, PEBS) nz) { precise = val; } else if (json_streq(map, field, MSRIndex) nz) { @@ -287,10 +295,10 @@ int json_events(const char *fn, msrval = val; } else if (json_streq(map, field, Errata) !json_streq(map, val, null)) { - addfield(map, desc, . , + addfield(map, extra_desc, . , Spec update: , val); } else if (json_streq(map, field, Data_LA) nz) { - addfield(map, desc, . , + addfield(map, extra_desc, . , Supports address when precise, NULL); } @@ -298,19 +306,26 @@ int json_events(const char *fn, } if (precise !strstr(desc, (Precise Event))) { if (json_streq(map, precise, 2)) - addfield(map, desc, , (Must be precise), - NULL); + addfield(map, extra_desc, , + (Must be precise), NULL); else - addfield(map, desc, , + addfield(map, extra_desc, , (Precise event), NULL); } + if (desc extra_desc) + addfield(map, desc, , extra_desc, NULL); + if (long_desc extra_desc) + addfield(map, long_desc, , extra_desc, NULL); if (msr != NULL) addfield(map, event, ,, msr-pname, msrval); fixname(name); - err = func(data, name, event, desc); + + err = func(data, name, event, desc, long_desc); free(event); free(desc); free(name
[PATCH v15 12/19] perf, tools: Add override support for event list CPUID
From: Andi Kleen a...@linux.intel.com Add a PERF_CPUID variable to override the CPUID of the current CPU (within the current architecture). This is useful for testing, so that all event lists can be tested on a single system. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- v2: Fix double free in earlier version. Print actual CPUID being used with verbose option. --- tools/perf/util/pmu.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ebbd4c7..2a7abac 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -477,10 +477,16 @@ static int pmu_add_cpu_aliases(struct list_head *head) struct pmu_event *pe; char *cpuid; - cpuid = get_cpuid_str(); + cpuid = getenv(PERF_CPUID); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(); if (!cpuid) return 0; + pr_debug(Using CPUID %s\n, cpuid); + i = 0; while (1) { map = pmu_events_map[i++]; -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v15 19/19] perf, tools: Add README for info on parsing JSON/map files
Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Acked-by: Jiri Olsa jo...@redhat.com --- tools/perf/pmu-events/README | 122 +++ 1 file changed, 122 insertions(+) create mode 100644 tools/perf/pmu-events/README diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README new file mode 100644 index 000..da57cb5 --- /dev/null +++ b/tools/perf/pmu-events/README @@ -0,0 +1,122 @@ + +The contents of this directory allow users to specify PMU events in +their CPUs by their symbolic names rather than raw event codes (see +example below). + +The main program in this directory, is the 'jevents', which is built and +executed _before_ the perf binary itself is built. + +The 'jevents' program tries to locate and process JSON files in the directory +tree tools/perf/pmu-events/arch/foo. + + - Regular files with '.json' extension in the name are assumed to be + JSON files, each of which describes a set of PMU events. + + - Regular files with basename starting with 'mapfile.csv' are assumed + to be a CSV file that maps a specific CPU to its set of PMU events. + (see below for mapfile format) + + - Directories are traversed, but all other files are ignored. + +Using the JSON files and the mapfile, 'jevents' generates the C source file, +'pmu-events.c', which encodes the two sets of tables: + + - Set of 'PMU events tables' for all known CPUs in the architecture, + (one table like the following, per JSON file; table name 'pme_power8' + is derived from JSON file name, 'power8.json'). + + struct pmu_event pme_power8[] = { + + ... + + { + .name = pm_1plus_ppc_cmpl, + .event = event=0x100f2, + .desc = 1 or more ppc insts finished,, + }, + + ... + } + + - A 'mapping table' that maps each CPU of the architecture, to its + 'PMU events table' + + struct pmu_events_map pmu_events_map[] = { + { + .cpuid = 004b, + .version = 1, + .type = core, + .table = pme_power8 + }, + ... + + }; + +After the 'pmu-events.c' is generated, it is compiled and the resulting +'pmu-events.o' is added to 'libperf.a' which is then used to build perf. + +NOTES: + 1. Several CPUs can support same set of events and hence use a common + JSON file. Hence several entries in the pmu_events_map[] could map + to a single 'PMU events table'. + + 2. The 'pmu-events.h' has an extern declaration for the mapping table + and the generated 'pmu-events.c' defines this table. + + 3. _All_ known CPU tables for architecture are included in the perf + binary. + +At run time, perf determines the actual CPU it is running on, finds the +matching events table and builds aliases for those events. This allows +users to specify events by their name: + + $ perf stat -e pm_1plus_ppc_cmpl sleep 1 + +where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. + +In case of errors when processing files in the tools/perf/pmu-events/arch +directory, 'jevents' tries to create an empty mapping file to allow the perf +build to succeed even if the PMU event aliases cannot be used. + +However some errors in processing may cause the perf build to fail. + +Mapfile format +=== + +The mapfile.csv format is expected to be: + + Header line + CPUID,Version,File/path/name.json,Type + +where: + + Comma: + is the required field delimiter (i.e other fields cannot + have commas within them). + + Comments: + Lines in which the first character is either '\n' or '#' + are ignored. + + Header line + The header line is the first line in the file, which is + _IGNORED_. It can be a comment (begin with '#') or empty. + + CPUID: + CPUID is an arch-specific char string, that can be used + to identify CPU (and associate it with a set of PMU events + it supports). Multiple CPUIDS can point to the same + File/path/name.json. + + Example: + CPUID == 'GenuineIntel-6-2E' (on x86). + CPUID == '004b0100' (PVR value in Powerpc) + Version: + is the Version of the mapfile. + + File/path/name.json: + is the pathname for the JSON file, relative to the directory + containing the mapfile.csv + + Type: + indicates whether the events or core or uncore events. -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc
[PATCH v14 06/19] perf, tools: Support CPU ID matching for Powerpc
Implement code that returns the generic CPU ID string for Powerpc. This will be used to identify the specific table of PMU events to parse/compare user specified events against. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v14] - [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/arch/powerpc/util/header.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a7..65f9391 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +char * +get_cpuid_str(void) +{ + char *bufp; + + if (asprintf(bufp, %.8lx, mfspr(SPRN_PVR)) 0) + bufp = NULL; + + return bufp; +} -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v14 13/19] perf, tools, jevents: Add support for long descriptions
Implement support in jevents to parse long descriptions for events that may have them in the JSON files. A follow on patch will make this long description available to user through the 'perf list' command. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/pmu-events/jevents.c| 31 +++ tools/perf/pmu-events/jevents.h|3 ++- tools/perf/pmu-events/pmu-events.h |1 + 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 5f7603b..a8507c9 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc) + char *desc, char *long_desc) { FILE *outfp = data; /* @@ -215,6 +215,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, \t.name = \%s\,\n, name); fprintf(outfp, \t.event = \%s\,\n, event); fprintf(outfp, \t.desc = \%s\,\n, desc); + if (long_desc long_desc[0]) + fprintf(outfp, \t.long_desc = \%s\,\n, long_desc); fprintf(outfp, },\n); @@ -235,7 +237,8 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc), + int (*func)(void *data, char *name, char *event, char *desc, + char *long_desc), void *data) { int err = -EIO; @@ -254,6 +257,8 @@ int json_events(const char *fn, tok = tokens + 1; for (i = 0; i tokens-size; i++) { char *event = NULL, *desc = NULL, *name = NULL; + char *long_desc = NULL; + char *extra_desc = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -279,6 +284,9 @@ int json_events(const char *fn, } else if (json_streq(map, field, BriefDescription)) { addfield(map, desc, , , val); fixdesc(desc); + } else if (json_streq(map, field, PublicDescription)) { + addfield(map, long_desc, , , val); + fixdesc(long_desc); } else if (json_streq(map, field, PEBS) nz) { precise = val; } else if (json_streq(map, field, MSRIndex) nz) { @@ -287,10 +295,10 @@ int json_events(const char *fn, msrval = val; } else if (json_streq(map, field, Errata) !json_streq(map, val, null)) { - addfield(map, desc, . , + addfield(map, extra_desc, . , Spec update: , val); } else if (json_streq(map, field, Data_LA) nz) { - addfield(map, desc, . , + addfield(map, extra_desc, . , Supports address when precise, NULL); } @@ -298,19 +306,26 @@ int json_events(const char *fn, } if (precise !strstr(desc, (Precise Event))) { if (json_streq(map, precise, 2)) - addfield(map, desc, , (Must be precise), - NULL); + addfield(map, extra_desc, , + (Must be precise), NULL); else - addfield(map, desc, , + addfield(map, extra_desc, , (Precise event), NULL); } + if (desc extra_desc) + addfield(map, desc, , extra_desc, NULL); + if (long_desc extra_desc) + addfield(map, long_desc, , extra_desc, NULL); if (msr != NULL) addfield(map, event, ,, msr-pname, msrval); fixname(name); - err = func(data, name, event, desc); + + err = func(data, name, event, desc, long_desc); free(event); free(desc); free(name); + free(long_desc
[PATCH v14 11/19] perf, tools: Add a --no-desc flag to perf list
From: Andi Kleen a...@linux.intel.com Add a --no-desc flag to perf list to not print the event descriptions that were earlier added for JSON events. This may be useful to get a less crowded listing. It's still default to print descriptions as that is the more useful default for most users. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com v2: Rename --quiet to --no-desc. Add option to man page. --- tools/perf/Documentation/perf-list.txt |8 +++- tools/perf/builtin-list.c | 12 tools/perf/util/parse-events.c |4 ++-- tools/perf/util/parse-events.h |2 +- tools/perf/util/pmu.c |4 ++-- tools/perf/util/pmu.h |2 +- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index bada893..9507552 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,13 +8,19 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +OPTIONS +--- +--no-desc:: +Don't print descriptions. + + [[EVENT_MODIFIERS]] EVENT MODIFIERS --- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index af5bd05..3f058f7 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -16,16 +16,20 @@ #include util/pmu.h #include util/parse-options.h +static bool desc_flag = true; + int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; struct option list_options[] = { OPT_BOOLEAN(0, raw-dump, raw_dump, Dump raw events), + OPT_BOOLEAN('d', desc, desc_flag, + Print extra event descriptions. --no-desc to not print.), OPT_END() }; const char * const list_usage[] = { - perf list [hw|sw|cache|tracepoint|pmu|event_glob], + perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob], NULL }; @@ -40,7 +44,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf(\nList of pre-defined events (to be used in -e):\n\n); if (argc == 0) { - print_events(NULL, raw_dump); + print_events(NULL, raw_dump, !desc_flag); return 0; } @@ -59,13 +63,13 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], hwcache) == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], pmu) == 0) - print_pmu_events(NULL, raw_dump); + print_pmu_events(NULL, raw_dump, !desc_flag); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump); + print_events(argv[i], raw_dump, !desc_flag); continue; } sep_idx = sep - argv[i]; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2a4d1ec..65f7572 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1521,7 +1521,7 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only) +void print_events(const char *event_glob, bool name_only, bool quiet_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1531,7 +1531,7 @@ void print_events(const char *event_glob, bool name_only) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only); + print_pmu_events(event_glob, name_only, quiet_flag); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 131f29b..d11f854 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -133,7 +133,7 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only); +void print_events(const char *event_glob, bool name_only, bool quiet); struct event_symbol
[PATCH v14 05/19] perf, tools: Use pmu_events table to create aliases
At run time (when 'perf' is starting up), locate the specific table of PMU events that corresponds to the current CPU. Using that table, create aliases for the each of the PMU events in the CPU. The use these aliases to parse the user specified perf event. In short this would allow the user to specify events using their aliases rather than raw event codes. Based on input and some earlier patches from Andi Kleen, Jiri Olsa. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v4] - Split off unrelated code into separate patches. Changelog[v3] - [Jiri Olsa] Fix memory leak in cpuid Changelog[v2] - [Andi Kleen] Replace pmu_events_map-vfm with a generic cpuid. --- tools/perf/util/header.h |1 + tools/perf/util/pmu.c| 61 ++ 2 files changed, 62 insertions(+) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d5796..996e899 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -157,4 +157,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); */ int get_cpuid(char *buffer, size_t sz); +char *get_cpuid_str(void); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7bcb8c3..7863d05 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,8 @@ #include pmu.h #include parse-events.h #include cpumap.h +#include header.h +#include pmu-events/pmu-events.h struct perf_pmu_format { char *name; @@ -449,6 +451,62 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } +/* + * Return the CPU id as a raw string. + * + * Each architecture should provide a more precise id string that + * can be use to match the architecture's mapfile. + */ +char * __weak get_cpuid_str(void) +{ + return NULL; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static int pmu_add_cpu_aliases(struct list_head *head) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + char *cpuid; + + cpuid = get_cpuid_str(); + if (!cpuid) + return 0; + + i = 0; + while (1) { + map = pmu_events_map[i++]; + if (!map-table) + goto out; + + if (!strcmp(map-cpuid, cpuid)) + break; + } + + /* +* Found a matching PMU events table. Create aliases +*/ + i = 0; + while (1) { + pe = map-table[i++]; + if (!pe-name) + break; + + /* need type casts to override 'const' */ + __perf_pmu__new_alias(head, NULL, (char *)pe-name, + (char *)pe-desc, (char *)pe-event); + } + +out: + free(cpuid); + return 0; +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -477,6 +535,9 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, aliases)) return NULL; + if (!strcmp(name, cpu)) + (void)pmu_add_cpu_aliases(aliases); + if (pmu_type(name, type)) return NULL; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v14 08/19] perf, tools: Support CPU id matching for x86 v2
From: Andi Kleen a...@linux.intel.com Implement the code to match CPU types to mapfile types for x86 based on CPUID. This extends an existing similar function, but changes it to use the x86 mapfile cpu description. This allows to resolve event lists generated by jevents. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com v2: Update to new get_cpuid_str() interface --- tools/perf/arch/x86/util/header.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 146d12a..a74a48d 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, : a (op)); } -int -get_cpuid(char *buffer, size_t sz) +static int +__get_cpuid(char *buffer, size_t sz, const char *fmt) { unsigned int a, b, c, d, lvl; int family = -1, model = -1, step = -1; @@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz) if (family = 0x6) model += ((a 16) 0xf) 4; } - nb = scnprintf(buffer, sz, %s,%u,%u,%u$, vendor, family, model, step); + nb = scnprintf(buffer, sz, fmt, vendor, family, model, step); /* look for end marker to ensure the entire data fit */ if (strchr(buffer, '$')) { @@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +int +get_cpuid(char *buffer, size_t sz) +{ + return __get_cpuid(buffer, sz, %s,%u,%u,%u$); +} + +char * +get_cpuid_str(void) +{ + char *buf = malloc(128); + + if (__get_cpuid(buf, 128, %s-%u-%X$) 0) { + free(buf); + return NULL; + } + return buf; +} -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v14 19/19] perf, tools: Add README for info on parsing JSON/map files
Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- tools/perf/pmu-events/README | 122 ++ 1 file changed, 122 insertions(+) create mode 100644 tools/perf/pmu-events/README diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README new file mode 100644 index 000..da57cb5 --- /dev/null +++ b/tools/perf/pmu-events/README @@ -0,0 +1,122 @@ + +The contents of this directory allow users to specify PMU events in +their CPUs by their symbolic names rather than raw event codes (see +example below). + +The main program in this directory, is the 'jevents', which is built and +executed _before_ the perf binary itself is built. + +The 'jevents' program tries to locate and process JSON files in the directory +tree tools/perf/pmu-events/arch/foo. + + - Regular files with '.json' extension in the name are assumed to be + JSON files, each of which describes a set of PMU events. + + - Regular files with basename starting with 'mapfile.csv' are assumed + to be a CSV file that maps a specific CPU to its set of PMU events. + (see below for mapfile format) + + - Directories are traversed, but all other files are ignored. + +Using the JSON files and the mapfile, 'jevents' generates the C source file, +'pmu-events.c', which encodes the two sets of tables: + + - Set of 'PMU events tables' for all known CPUs in the architecture, + (one table like the following, per JSON file; table name 'pme_power8' + is derived from JSON file name, 'power8.json'). + + struct pmu_event pme_power8[] = { + + ... + + { + .name = pm_1plus_ppc_cmpl, + .event = event=0x100f2, + .desc = 1 or more ppc insts finished,, + }, + + ... + } + + - A 'mapping table' that maps each CPU of the architecture, to its + 'PMU events table' + + struct pmu_events_map pmu_events_map[] = { + { + .cpuid = 004b, + .version = 1, + .type = core, + .table = pme_power8 + }, + ... + + }; + +After the 'pmu-events.c' is generated, it is compiled and the resulting +'pmu-events.o' is added to 'libperf.a' which is then used to build perf. + +NOTES: + 1. Several CPUs can support same set of events and hence use a common + JSON file. Hence several entries in the pmu_events_map[] could map + to a single 'PMU events table'. + + 2. The 'pmu-events.h' has an extern declaration for the mapping table + and the generated 'pmu-events.c' defines this table. + + 3. _All_ known CPU tables for architecture are included in the perf + binary. + +At run time, perf determines the actual CPU it is running on, finds the +matching events table and builds aliases for those events. This allows +users to specify events by their name: + + $ perf stat -e pm_1plus_ppc_cmpl sleep 1 + +where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. + +In case of errors when processing files in the tools/perf/pmu-events/arch +directory, 'jevents' tries to create an empty mapping file to allow the perf +build to succeed even if the PMU event aliases cannot be used. + +However some errors in processing may cause the perf build to fail. + +Mapfile format +=== + +The mapfile.csv format is expected to be: + + Header line + CPUID,Version,File/path/name.json,Type + +where: + + Comma: + is the required field delimiter (i.e other fields cannot + have commas within them). + + Comments: + Lines in which the first character is either '\n' or '#' + are ignored. + + Header line + The header line is the first line in the file, which is + _IGNORED_. It can be a comment (begin with '#') or empty. + + CPUID: + CPUID is an arch-specific char string, that can be used + to identify CPU (and associate it with a set of PMU events + it supports). Multiple CPUIDS can point to the same + File/path/name.json. + + Example: + CPUID == 'GenuineIntel-6-2E' (on x86). + CPUID == '004b0100' (PVR value in Powerpc) + Version: + is the Version of the mapfile. + + File/path/name.json: + is the pathname for the JSON file, relative to the directory + containing the mapfile.csv + + Type: + indicates whether the events or core or uncore events. -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https
[PATCH V14 00/14] perf, tools: Add support for PMU events in JSON format
CPUs support a large number of performance monitoring events (PMU events) and often these events are very specific to an architecture/model of the CPU. To use most of these PMU events with perf, we currently have to identify them by their raw codes: perf stat -e r100f2 sleep 1 This patchset allows architectures to specify these PMU events in JSON files located in 'tools/perf/pmu-events/arch/' of the mainline tree. The events from the JSON files for the architecture are then built into the perf binary. At run time, perf identifies the specific set of events for the CPU and creates event aliases. These aliases allow users to specify events by name as: perf stat -e pm_1plus_ppc_cmpl sleep 1 The file, 'tools/perf/pmu-events/README' in [PATCH 14/14] gives more details. Note: - All known events tables for the architecture are included in the perf binary. - For architectures that don't have any JSON files, an empty mapping table is created and they should continue to build) Thanks to input from Andi Kleen, Jiri Olsa, Namhyung Kim and Ingo Molnar. These patches are available from: https://github.com:sukadev/linux.git Branch Description -- json-v14Source Code only json-files-3x86 and Powerpc datafiles only json-v14-with-data Both code and data (build/test) NOTE: Only source code patches (i.e those in json-v14) are being emailed. Please pull the data files from the json-files-3 branch. Changelog[v14] Comments from Jiri Olsa: - Change parameter name/type for pmu_add_cpu_aliases (from void *data to list_head *head) - Use asprintf() in file_name_to_tablename() and simplify/reorg code. - Use __weak definition from linux/compile.h - Use fopen() with mode w and eliminate unlink() - Remove minor TODO. - Add error check for return value from strdup() in print_pmu_events(). - Move independent changes from patches 3,11,12 .. to separate patches for easier review/backport. - Clarify mapfile's header line support in patch description. - Fix build failure with DEBUG=1 Comment from Andi Kleen: - In tools/perf/pmu-events/Build, check for 'mapfile.csv' rather than 'mapfile*' Misc: - Minor changes/clarifications to tools/perf/pmu-events/README. Changelog[v13] Version: Individual patches have their own history :-) that I am preserving. Patchset version (v13) is for overall patchset and is somewhat arbitrary. - Added support for categories of events to perf - Add mapfile, jevents build dependency on pmu-events.c - Silence jevents when parsing JSON files unless V=1 is specified - Cleanup error messages - Fix memory leak with -cpuid - Rebase to Arnaldo's tree - Allow overriding CPUID via environment variable - Support long descriptions for events - Handle header line in mapfile.csv - Cleanup JSON files (trim PublicDescription if identical to/prefix of BriefDescription field) Andi Kleen (10): perf, tools: Add jsmn `jasmine' JSON parser perf, tools, jevents: Program to convert JSON file to C style file perf, tools: Allow events with dot perf, tools: Support CPU id matching for x86 v2 perf, tools: Support alias descriptions perf, tools: Query terminal width and use in perf list perf, tools: Add a --no-desc flag to perf list perf, tools: Add override support for event list CPUID perf, tools: Add support for event list topics perf, tools: Handle header line in mapfile Sukadev Bhattiprolu (9): Use __weak definition from linux/compiler.h perf, tools: Split perf_pmu__new_alias() perf, tools: Use pmu_events table to create aliases perf, tools: Support CPU ID matching for Powerpc perf, tools, jevents: Add support for long descriptions perf, tools: Add alias support for long descriptions perf, tools: Support long descriptions with perf list perf, tools, jevents: Add support for event topics perf, tools: Add README for info on parsing JSON/map files tools/perf/Documentation/perf-list.txt |8 +- tools/perf/Makefile.perf | 25 +- tools/perf/arch/powerpc/util/header.c | 11 + tools/perf/arch/x86/util/header.c | 24 +- tools/perf/builtin-list.c | 17 +- tools/perf/pmu-events/Build| 11 + tools/perf/pmu-events/README | 122 ++ tools/perf/pmu-events/jevents.c| 712 tools/perf/pmu-events/jevents.h| 18 + tools/perf/pmu-events/jsmn.c | 313 ++ tools/perf/pmu-events/jsmn.h | 67 +++ tools/perf/pmu-events/json.c
[PATCH v14 04/19] perf, tools: Split perf_pmu__new_alias()
Separate the event parsing code in perf_pmu__new_alias() out into a separate function __perf_pmu__new_alias() so that code can be called indepdently. This is based on an earlier patch from Andi Kleen. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- tools/perf/util/pmu.c | 42 +++--- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c6b16b1..7bcb8c3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -206,17 +206,12 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, +char *desc __maybe_unused, char *val) { struct perf_pmu_alias *alias; - char buf[256]; int ret; - ret = fread(buf, 1, sizeof(buf), file); - if (ret == 0) - return -EINVAL; - buf[ret] = 0; - alias = malloc(sizeof(*alias)); if (!alias) return -ENOMEM; @@ -226,26 +221,43 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI alias-unit[0] = '\0'; alias-per_pkg = false; - ret = parse_events_terms(alias-terms, buf); + ret = parse_events_terms(alias-terms, val); if (ret) { + pr_err(Cannot parse alias %s: %d\n, val, ret); free(alias); return ret; } alias-name = strdup(name); - /* -* load unit name and scale if available -*/ - perf_pmu__parse_unit(alias, dir, name); - perf_pmu__parse_scale(alias, dir, name); - perf_pmu__parse_per_pkg(alias, dir, name); - perf_pmu__parse_snapshot(alias, dir, name); + if (dir) { + /* +* load unit name and scale if available +*/ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); + } list_add_tail(alias-list, list); return 0; } +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +{ + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + + buf[ret] = 0; + + return __perf_pmu__new_alias(list, dir, name, NULL, buf); +} + static inline bool pmu_alias_info_file(char *name) { size_t len; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v14 10/19] perf, tools: Query terminal width and use in perf list
From: Andi Kleen a...@linux.intel.com Automatically adapt the now wider and word wrapped perf list output to wider terminals. This requires querying the terminal before the auto pager takes over, and exporting this information from the pager subsystem. Acked-by: Namhyung Kim namhy...@kernel.org Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- tools/perf/util/cache.h |1 + tools/perf/util/pager.c | 15 +++ tools/perf/util/pmu.c |3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index c861373..8e0d4b8 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -32,6 +32,7 @@ extern const char *perf_config_dirname(const char *, const char *); extern void setup_pager(void); extern int pager_in_use(void); extern int pager_use_color; +int pager_get_columns(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 53ef006..1770c88 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -1,6 +1,7 @@ #include cache.h #include run-command.h #include sigchain.h +#include sys/ioctl.h /* * This is split up from the rest of git so that we can do @@ -8,6 +9,7 @@ */ static int spawned_pager; +static int pager_columns; static void pager_preexec(void) { @@ -47,9 +49,12 @@ static void wait_for_pager_signal(int signo) void setup_pager(void) { const char *pager = getenv(PERF_PAGER); + struct winsize sz; if (!isatty(1)) return; + if (ioctl(1, TIOCGWINSZ, sz) == 0) + pager_columns = sz.ws_col; if (!pager) pager = getenv(PAGER); if (!(pager || access(/usr/bin/pager, X_OK))) @@ -93,3 +98,13 @@ int pager_in_use(void) env = getenv(PERF_PAGER_IN_USE); return env ? perf_config_bool(PERF_PAGER_IN_USE, env) : 0; } + +int pager_get_columns(void) +{ + char *s; + + s = getenv(COLUMNS); + if (s) + return atoi(s); + return (pager_columns ? pager_columns : 80) - 2; +} diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e377598..443086e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -14,6 +14,7 @@ #include cpumap.h #include header.h #include pmu-events/pmu-events.h +#include cache.h struct perf_pmu_format { char *name; @@ -1038,7 +1039,7 @@ void print_pmu_events(const char *event_glob, bool name_only) int len, j; struct pair *aliases; int numdesc = 0; - int columns = 78; + int columns = pager_get_columns(); pmu = NULL; len = 0; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v14 14/19] perf, tools: Add alias support for long descriptions
Previously we were dropping the useful longer descriptions that some events have in the event list completely. Now that jevents provides support for longer descriptions (see previous patch), add support for parsing the long descriptions Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/util/parse-events.c |5 +++-- tools/perf/util/parse-events.h |3 ++- tools/perf/util/pmu.c | 16 +++- tools/perf/util/pmu.h |4 +++- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 65f7572..c4ee41d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1521,7 +1521,8 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1531,7 +1532,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d11f854..5c93814 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -133,7 +133,8 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only, bool quiet); +void print_events(const char *event_glob, bool name_only, bool quiet, + bool long_desc); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 300975e..05653ec 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc __maybe_unused, char *val) +char *desc __maybe_unused, char *val, +char *long_desc) { struct perf_pmu_alias *alias; int ret; @@ -243,6 +244,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, } alias-desc = desc ? strdup(desc) : NULL; + alias-long_desc = long_desc ? strdup(long_desc) : + desc ? strdup(desc) : NULL; list_add_tail(alias-list, list); @@ -260,7 +263,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -508,7 +511,8 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe-name, - (char *)pe-desc, (char *)pe-event); + (char *)pe-desc, (char *)pe-event, + (char *)pe-long_desc); } out: @@ -1036,7 +1040,8 @@ static void wordwrap(char *s, int start, int max, int corr) } } -void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1083,7 +1088,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) if (!aliases[j].name) goto out_enomem; - aliases[j].desc = alias-desc; + aliases[j].desc = long_desc ? alias-long_desc : + alias-desc; j++; } if (pmu-selectable) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 9966c1a..10e981c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -39,6 +39,7 @@ struct perf_pmu_info { struct perf_pmu_alias { char *name; char *desc
[PATCH v14 02/19] perf, tools, jevents: Program to convert JSON file to C style file
From: Andi Kleen a...@linux.intel.com This is a modified version of an earlier patch by Andi Kleen. We expect architectures to describe the performance monitoring events for each CPU in a corresponding JSON file, which look like: [ { EventCode: 0x00, UMask: 0x01, EventName: INST_RETIRED.ANY, BriefDescription: Instructions retired from execution., PublicDescription: Instructions retired from execution., Counter: Fixed counter 1, CounterHTOff: Fixed counter 1, SampleAfterValue: 203, SampleAfterValue: 203, MSRIndex: 0, MSRValue: 0, TakenAlone: 0, CounterMask: 0, Invert: 0, AnyThread: 0, EdgeDetect: 0, PEBS: 0, PRECISE_STORE: 0, Errata: null, Offcore: 0 } ] We also expect the architectures to provide a mapping between individual CPUs to their JSON files. Eg: GenuineIntel-6-1E,V1,/NHM-EP/NehalemEP_core_V1.json,core which maps each CPU, identified by [vendor, family, model, version, type] to a JSON file. Given these files, the program, jevents:: - locates all JSON files for the architecture, - parses each JSON file and generates a C-style PMU-events table (pmu-events.c) - locates a mapfile for the architecture - builds a global table, mapping each model of CPU to the corresponding PMU-events table. The 'pmu-events.c' is generated when building perf and added to libperf.a. The global table pmu_events_map[] table in this pmu-events.c will be used in perf in a follow-on patch. If the architecture does not have any JSON files or there is an error in processing them, an empty mapping file is created. This would allow the build of perf to proceed even if we are not able to provide aliases for events. The parser for JSON files allows parsing Intel style JSON event files. This allows to use an Intel event list directly with perf. The Intel event lists can be quite large and are too big to store in unswappable kernel memory. The conversion from JSON to C-style is straight forward. The parser knows (very little) Intel specific information, and can be easily extended to handle fields for other CPUs. The parser code is partially shared with an independent parsing library, which is 2-clause BSD licenced. To avoid any conflicts I marked those files as BSD licenced too. As part of perf they become GPLv2. Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com v2: Address review feedback. Rename option to --event-files v3: Add JSON example v4: Update manpages. v5: Don't remove dot in fixname. Fix compile error. Add include protection. Comment realloc. v6: Include debug/util.h v7: (Sukadev Bhattiprolu) Rebase to 4.0 and fix some conflicts. v8: (Sukadev Bhattiprolu) Move jevents.[hc] to tools/perf/pmu-events/ Rewrite to locate and process arch specific JSON and map files; and generate a C file. (Removed acked-by Namhyung Kim due to modest changes to patch) Compile the generated pmu-events.c and add the pmu-events.o to libperf.a v9: [Sukadev Bhattiprolu/Andi Kleen] Rename -vfm to -cpuid and use that field to encode the PVR in Power. Allow blank lines in mapfile. [Jiri Olsa] Pass ARCH as a parameter to jevents so we don't have to detect it. [Jiri Olsa] Use the infrastrastructure to build pmu-events/perf (Makefile changes from Jiri included in this patch). [Jiri Olsa, Andi Kleen] Detect changes to JSON files and rebuild pmu-events.o only if necessary. v11:- [Andi Kleen] Add mapfile, jevents dependency on pmu-events.c - [Jiri Olsa] Be silient if arch doesn't have JSON files - Also silence 'jevents' when parsing JSON files unless V=1 is specified during build. Cleanup error messages. v14:- - [Jiri Olsa] Fix compile error with DEBUG=1; drop unlink() and use w mode with fopen(); simplify file_name_to_table_name() --- tools/perf/Makefile.perf | 25 +- tools/perf/pmu-events/Build| 11 + tools/perf/pmu-events/jevents.c| 686 tools/perf/pmu-events/jevents.h| 17 + tools/perf/pmu-events/json.h |3 + tools/perf/pmu-events/pmu-events.h | 35 ++ 6 files changed, 773 insertions(+), 4 deletions(-) create mode 100644 tools/perf/pmu-events/Build create mode 100644 tools/perf/pmu-events/jevents.c create mode 100644 tools/perf/pmu-events/jevents.h create mode 100644 tools/perf/pmu-events/pmu-events.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5816a3b..6a50fc1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -272,14 +272,29 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o +JEVENTS := $(OUTPUT)pmu
[PATCH v14 01/19] perf, tools: Add jsmn `jasmine' JSON parser
From: Andi Kleen a...@linux.intel.com I need a JSON parser. This adds the simplest JSON parser I could find -- Serge Zaitsev's jsmn `jasmine' -- to the perf library. I merely converted it to (mostly) Linux style and added support for non 0 terminated input. The parser is quite straight forward and does not copy any data, just returns tokens with offsets into the input buffer. So it's relatively efficient and simple to use. The code is not fully checkpatch clean, but I didn't want to completely fork the upstream code. Original source: http://zserge.bitbucket.org/jsmn.html In addition I added a simple wrapper that mmaps a json file and provides some straight forward access functions. Used in follow-on patches to parse event files. Acked-by: Namhyung Kim namhy...@kernel.org Signed-off-by: Andi Kleen a...@linux.intel.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- v2: Address review feedback. v3: Minor checkpatch fixes. v4 (by Sukadev Bhattiprolu) - Rebase to 4.0 and fix minor conflicts in tools/perf/Makefile.perf - Report error if specified events file is invalid. v5 (Sukadev Bhattiprolu) - Move files to tools/perf/pmu-events/ since parsing of JSON file now occurs when _building_ rather than running perf. --- tools/perf/pmu-events/jsmn.c | 313 ++ tools/perf/pmu-events/jsmn.h | 67 + tools/perf/pmu-events/json.c | 162 ++ tools/perf/pmu-events/json.h | 36 + 4 files changed, 578 insertions(+) create mode 100644 tools/perf/pmu-events/jsmn.c create mode 100644 tools/perf/pmu-events/jsmn.h create mode 100644 tools/perf/pmu-events/json.c create mode 100644 tools/perf/pmu-events/json.h diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c new file mode 100644 index 000..11d1fa1 --- /dev/null +++ b/tools/perf/pmu-events/jsmn.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2010 Serge A. Zaitsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Slightly modified by AK to not assume 0 terminated input. + */ + +#include stdlib.h +#include jsmn.h + +/* + * Allocates a fresh unused token from the token pool. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *tok; + + if ((unsigned)parser-toknext = num_tokens) + return NULL; + tok = tokens[parser-toknext++]; + tok-start = tok-end = -1; + tok-size = 0; + return tok; +} + +/* + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) +{ + token-type = type; + token-start = start; + token-end = end; + token-size = 0; +} + +/* + * Fills next available token with JSON primitive. + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *token; + int start; + + start = parser-pos; + + for (; parser-pos len; parser-pos++) { + switch (js[parser-pos]) { +#ifndef JSMN_STRICT + /* +* In strict mode primitive must be followed by , +* or } or ] +*/ + case ':': +#endif + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ']': + case '}': + goto found; + default: + break; + } + if (js[parser-pos] 32 || js[parser-pos] = 127) { + parser-pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef