Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu: > Xtensa Performance Monitor Module has up to 8 32 bit wide performance > counters. Each counter may be enabled independently and can count any > single type of hardware performance events. Event counting may be enabled > and disabled globally (per PMM). > Each counter has status register with bits indicating if the counter has > been overflown and may be programmed to raise profiling IRQ on overflow. > This IRQ is used to rewind counters and allow for counting more than 2^32 > samples for counting events and to report samples for sampling events. > > For more details see Tensilica Debug User's Guide, chapter 8 > "Performance monitor module".
Has this gone via PeterZ? I added the tools/ bits in my perf/core branch, will go in next pull req, - Arnaldo > Cc: Peter Zijlstra <[email protected]> > Cc: Paul Mackerras <[email protected]> > Cc: Ingo Molnar <[email protected]> > Cc: Arnaldo Carvalho de Melo <[email protected]> > Signed-off-by: Max Filippov <[email protected]> > --- > Changes v1->v2: > - use -EINVAL instead of -ENOENT for invalid PMU event configuratons. > > arch/xtensa/Kconfig | 10 + > arch/xtensa/kernel/Makefile | 1 + > arch/xtensa/kernel/perf_event.c | 450 > ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 461 insertions(+) > create mode 100644 arch/xtensa/kernel/perf_event.c > > diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig > index 3c57934..0e92885 100644 > --- a/arch/xtensa/Kconfig > +++ b/arch/xtensa/Kconfig > @@ -126,6 +126,16 @@ config XTENSA_VARIANT_MMU > Build a Conventional Kernel with full MMU support, > ie: it supports a TLB with auto-loading, page protection. > > +config XTENSA_VARIANT_HAVE_PERF_EVENTS > + bool "Core variant has Performance Monitor Module" > + depends on XTENSA_VARIANT_CUSTOM > + default n > + help > + Enable if core variant has Performance Monitor Module with > + External Registers Interface. > + > + If unsure, say N. > + > config XTENSA_UNALIGNED_USER > bool "Unaligned memory access in use space" > help > diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile > index d3a0f0f..547a757 100644 > --- a/arch/xtensa/kernel/Makefile > +++ b/arch/xtensa/kernel/Makefile > @@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o > obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o > obj-$(CONFIG_FUNCTION_TRACER) += mcount.o > obj-$(CONFIG_SMP) += smp.o mxhead.o > +obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o > > AFLAGS_head.o += -mtext-section-literals > > diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c > new file mode 100644 > index 0000000..b44df3c > --- /dev/null > +++ b/arch/xtensa/kernel/perf_event.c > @@ -0,0 +1,450 @@ > +/* > + * Xtensa Performance Monitor Module driver > + * See Tensilica Debug User's Guide for PMU registers documentation. > + * > + * Copyright (C) 2015 Cadence Design Systems Inc. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +#include <linux/interrupt.h> > +#include <linux/irqdomain.h> > +#include <linux/module.h> > +#include <linux/of.h> > +#include <linux/perf_event.h> > +#include <linux/platform_device.h> > + > +#include <asm/processor.h> > +#include <asm/stacktrace.h> > + > +/* Global control/status for all perf counters */ > +#define XTENSA_PMU_PMG 0x1000 > +/* Perf counter values */ > +#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) > +/* Perf counter control registers */ > +#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) > +/* Perf counter status registers */ > +#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) > + > +#define XTENSA_PMU_PMG_PMEN 0x1 > + > +#define XTENSA_PMU_COUNTER_MASK 0xffffffffULL > +#define XTENSA_PMU_COUNTER_MAX 0x7fffffff > + > +#define XTENSA_PMU_PMCTRL_INTEN 0x00000001 > +#define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 > +#define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 > +#define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 > +#define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 > +#define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 > +#define XTENSA_PMU_PMCTRL_MASK 0xffff0000 > + > +#define XTENSA_PMU_MASK(select, mask) \ > + (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ > + ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ > + XTENSA_PMU_PMCTRL_TRACELEVEL | \ > + XTENSA_PMU_PMCTRL_INTEN) > + > +#define XTENSA_PMU_PMSTAT_OVFL 0x00000001 > +#define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 > + > +struct xtensa_pmu_events { > + /* Array of events currently on this core */ > + struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; > + /* Bitmap of used hardware counters */ > + unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; > +}; > +static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); > + > +static const u32 xtensa_hw_ctl[] = { > + [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), > + [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), > + [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), > + [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), > + /* Taken and non-taken branches + taken loop ends */ > + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), > + /* Instruction-related + other global stall cycles */ > + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), > + /* Data-related global stall cycles */ > + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), > +}; > + > +#define C(_x) PERF_COUNT_HW_CACHE_##_x > + > +static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { > + [C(L1D)] = { > + [C(OP_READ)] = { > + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), > + [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), > + }, > + [C(OP_WRITE)] = { > + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), > + [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), > + }, > + }, > + [C(L1I)] = { > + [C(OP_READ)] = { > + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), > + [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), > + }, > + }, > + [C(DTLB)] = { > + [C(OP_READ)] = { > + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), > + [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), > + }, > + }, > + [C(ITLB)] = { > + [C(OP_READ)] = { > + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), > + [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), > + }, > + }, > +}; > + > +static int xtensa_pmu_cache_event(u64 config) > +{ > + unsigned int cache_type, cache_op, cache_result; > + int ret; > + > + cache_type = (config >> 0) & 0xff; > + cache_op = (config >> 8) & 0xff; > + cache_result = (config >> 16) & 0xff; > + > + if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || > + cache_op >= C(OP_MAX) || > + cache_result >= C(RESULT_MAX)) > + return -EINVAL; > + > + ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; > + > + if (ret == 0) > + return -EINVAL; > + > + return ret; > +} > + > +static inline uint32_t xtensa_pmu_read_counter(int idx) > +{ > + return get_er(XTENSA_PMU_PM(idx)); > +} > + > +static inline void xtensa_pmu_write_counter(int idx, uint32_t v) > +{ > + set_er(v, XTENSA_PMU_PM(idx)); > +} > + > +static void xtensa_perf_event_update(struct perf_event *event, > + struct hw_perf_event *hwc, int idx) > +{ > + uint64_t prev_raw_count, new_raw_count; > + int64_t delta; > + > + do { > + prev_raw_count = local64_read(&hwc->prev_count); > + new_raw_count = xtensa_pmu_read_counter(event->hw.idx); > + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, > + new_raw_count) != prev_raw_count); > + > + delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; > + > + local64_add(delta, &event->count); > + local64_sub(delta, &hwc->period_left); > +} > + > +static bool xtensa_perf_event_set_period(struct perf_event *event, > + struct hw_perf_event *hwc, int idx) > +{ > + bool rc = false; > + s64 left; > + > + if (!is_sampling_event(event)) { > + left = XTENSA_PMU_COUNTER_MAX; > + } else { > + s64 period = hwc->sample_period; > + > + left = local64_read(&hwc->period_left); > + if (left <= -period) { > + left = period; > + local64_set(&hwc->period_left, left); > + hwc->last_period = period; > + rc = true; > + } else if (left <= 0) { > + left += period; > + local64_set(&hwc->period_left, left); > + hwc->last_period = period; > + rc = true; > + } > + if (left > XTENSA_PMU_COUNTER_MAX) > + left = XTENSA_PMU_COUNTER_MAX; > + } > + > + local64_set(&hwc->prev_count, -left); > + xtensa_pmu_write_counter(idx, -left); > + perf_event_update_userpage(event); > + > + return rc; > +} > + > +static void xtensa_pmu_enable(struct pmu *pmu) > +{ > + set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); > +} > + > +static void xtensa_pmu_disable(struct pmu *pmu) > +{ > + set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); > +} > + > +static int xtensa_pmu_event_init(struct perf_event *event) > +{ > + int ret; > + > + switch (event->attr.type) { > + case PERF_TYPE_HARDWARE: > + if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || > + xtensa_hw_ctl[event->attr.config] == 0) > + return -EINVAL; > + event->hw.config = xtensa_hw_ctl[event->attr.config]; > + return 0; > + > + case PERF_TYPE_HW_CACHE: > + ret = xtensa_pmu_cache_event(event->attr.config); > + if (ret < 0) > + return ret; > + event->hw.config = ret; > + return 0; > + > + case PERF_TYPE_RAW: > + /* Not 'previous counter' select */ > + if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == > + (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) > + return -EINVAL; > + event->hw.config = (event->attr.config & > + (XTENSA_PMU_PMCTRL_KRNLCNT | > + XTENSA_PMU_PMCTRL_TRACELEVEL | > + XTENSA_PMU_PMCTRL_SELECT | > + XTENSA_PMU_PMCTRL_MASK)) | > + XTENSA_PMU_PMCTRL_INTEN; > + return 0; > + > + default: > + return -ENOENT; > + } > +} > + > +/* > + * Starts/Stops a counter present on the PMU. The PMI handler > + * should stop the counter when perf_event_overflow() returns > + * !0. ->start() will be used to continue. > + */ > +static void xtensa_pmu_start(struct perf_event *event, int flags) > +{ > + struct hw_perf_event *hwc = &event->hw; > + int idx = hwc->idx; > + > + if (WARN_ON_ONCE(idx == -1)) > + return; > + > + if (flags & PERF_EF_RELOAD) { > + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); > + xtensa_perf_event_set_period(event, hwc, idx); > + } > + > + hwc->state = 0; > + > + set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); > +} > + > +static void xtensa_pmu_stop(struct perf_event *event, int flags) > +{ > + struct hw_perf_event *hwc = &event->hw; > + int idx = hwc->idx; > + > + if (!(hwc->state & PERF_HES_STOPPED)) { > + set_er(0, XTENSA_PMU_PMCTRL(idx)); > + set_er(get_er(XTENSA_PMU_PMSTAT(idx)), > + XTENSA_PMU_PMSTAT(idx)); > + hwc->state |= PERF_HES_STOPPED; > + } > + > + if ((flags & PERF_EF_UPDATE) && > + !(event->hw.state & PERF_HES_UPTODATE)) { > + xtensa_perf_event_update(event, &event->hw, idx); > + event->hw.state |= PERF_HES_UPTODATE; > + } > +} > + > +/* > + * Adds/Removes a counter to/from the PMU, can be done inside > + * a transaction, see the ->*_txn() methods. > + */ > +static int xtensa_pmu_add(struct perf_event *event, int flags) > +{ > + struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); > + struct hw_perf_event *hwc = &event->hw; > + int idx = hwc->idx; > + > + if (__test_and_set_bit(idx, ev->used_mask)) { > + idx = find_first_zero_bit(ev->used_mask, > + XCHAL_NUM_PERF_COUNTERS); > + if (idx == XCHAL_NUM_PERF_COUNTERS) > + return -EAGAIN; > + > + __set_bit(idx, ev->used_mask); > + hwc->idx = idx; > + } > + ev->event[idx] = event; > + > + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; > + > + if (flags & PERF_EF_START) > + xtensa_pmu_start(event, PERF_EF_RELOAD); > + > + perf_event_update_userpage(event); > + return 0; > +} > + > +static void xtensa_pmu_del(struct perf_event *event, int flags) > +{ > + struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); > + > + xtensa_pmu_stop(event, PERF_EF_UPDATE); > + __clear_bit(event->hw.idx, ev->used_mask); > + perf_event_update_userpage(event); > +} > + > +static void xtensa_pmu_read(struct perf_event *event) > +{ > + xtensa_perf_event_update(event, &event->hw, event->hw.idx); > +} > + > +static int callchain_trace(struct stackframe *frame, void *data) > +{ > + struct perf_callchain_entry *entry = data; > + > + perf_callchain_store(entry, frame->pc); > + return 0; > +} > + > +void perf_callchain_kernel(struct perf_callchain_entry *entry, > + struct pt_regs *regs) > +{ > + xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH, > + callchain_trace, NULL, entry); > +} > + > +void perf_callchain_user(struct perf_callchain_entry *entry, > + struct pt_regs *regs) > +{ > + xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH, > + callchain_trace, entry); > +} > + > +void perf_event_print_debug(void) > +{ > + unsigned long flags; > + unsigned i; > + > + local_irq_save(flags); > + pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), > + get_er(XTENSA_PMU_PMG)); > + for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) > + pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", > + i, get_er(XTENSA_PMU_PM(i)), > + i, get_er(XTENSA_PMU_PMCTRL(i)), > + i, get_er(XTENSA_PMU_PMSTAT(i))); > + local_irq_restore(flags); > +} > + > +static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) > +{ > + irqreturn_t rc = IRQ_NONE; > + struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); > + unsigned i; > + > + for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); > + i < XCHAL_NUM_PERF_COUNTERS; > + i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { > + uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); > + struct perf_event *event = ev->event[i]; > + struct hw_perf_event *hwc = &event->hw; > + u64 last_period; > + > + if (!(v & XTENSA_PMU_PMSTAT_OVFL)) > + continue; > + > + set_er(v, XTENSA_PMU_PMSTAT(i)); > + xtensa_perf_event_update(event, hwc, i); > + last_period = hwc->last_period; > + if (xtensa_perf_event_set_period(event, hwc, i)) { > + struct perf_sample_data data; > + struct pt_regs *regs = get_irq_regs(); > + > + perf_sample_data_init(&data, 0, last_period); > + if (perf_event_overflow(event, &data, regs)) > + xtensa_pmu_stop(event, 0); > + } > + > + rc = IRQ_HANDLED; > + } > + return rc; > +} > + > +static struct pmu xtensa_pmu = { > + .pmu_enable = xtensa_pmu_enable, > + .pmu_disable = xtensa_pmu_disable, > + .event_init = xtensa_pmu_event_init, > + .add = xtensa_pmu_add, > + .del = xtensa_pmu_del, > + .start = xtensa_pmu_start, > + .stop = xtensa_pmu_stop, > + .read = xtensa_pmu_read, > +}; > + > +static void xtensa_pmu_setup(void) > +{ > + unsigned i; > + > + set_er(0, XTENSA_PMU_PMG); > + for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { > + set_er(0, XTENSA_PMU_PMCTRL(i)); > + set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); > + } > +} > + > +static int xtensa_pmu_notifier(struct notifier_block *self, > + unsigned long action, void *data) > +{ > + switch (action & ~CPU_TASKS_FROZEN) { > + case CPU_STARTING: > + xtensa_pmu_setup(); > + break; > + > + default: > + break; > + } > + > + return NOTIFY_OK; > +} > + > +static int __init xtensa_pmu_init(void) > +{ > + int ret; > + int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); > + > + perf_cpu_notifier(xtensa_pmu_notifier); > + ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, > + "pmu", NULL); > + if (ret < 0) > + return ret; > + > + ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); > + if (ret) > + free_irq(irq, NULL); > + > + return ret; > +} > +early_initcall(xtensa_pmu_init); > -- > 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/

