New module to support the Cell PMU. This patch uses the same PMC and PMD tables as the earlier patches, and still uses the RTAS call for setting up the debug-bus signals. I still need to review the patches posted by Azuma-san a couple weeks ago to support systems without RTAS support. I hope to get to that in the next week or so.
This patch does not include support for the hardware-sampling feature in the Cell PMU. We are still testing that module, and should be able to submit it this month. Signed-off-by: Kevin Corry <[EMAIL PROTECTED]> Signed-off-by: Carl Love <[EMAIL PROTECTED]> Index: linux-2.6.22-arnd1-perfmon1/arch/powerpc/perfmon/Kconfig =================================================================== --- linux-2.6.22-arnd1-perfmon1.orig/arch/powerpc/perfmon/Kconfig +++ linux-2.6.22-arnd1-perfmon1/arch/powerpc/perfmon/Kconfig @@ -29,4 +29,13 @@ config PERFMON_PPC32 help Enables support for the PPC32 hardware performance counters If unsure, say M. + +config PERFMON_CELL + tristate "Support for Cell hardware performance counters" + depends on PERFMON && PPC_CELL + default n + help + Enables support for the Cell hardware performance counters. + If unsure, say M. + endmenu Index: linux-2.6.22-arnd1-perfmon1/arch/powerpc/perfmon/Makefile =================================================================== --- linux-2.6.22-arnd1-perfmon1.orig/arch/powerpc/perfmon/Makefile +++ linux-2.6.22-arnd1-perfmon1/arch/powerpc/perfmon/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_PERFMON) += perfmon.o obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o +obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o Index: linux-2.6.22-arnd1-perfmon1/arch/powerpc/perfmon/perfmon_cell.c =================================================================== --- /dev/null +++ linux-2.6.22-arnd1-perfmon1/arch/powerpc/perfmon/perfmon_cell.c @@ -0,0 +1,569 @@ +/* + * This file contains the Cell PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright IBM Corporation 2007 + * + * Based on other Perfmon2 PMU modules. + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian <[EMAIL PROTECTED]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/perfmon.h> +#include <asm/cell-pmu.h> +#include <asm/io.h> +#include <asm/rtas.h> +#include "../platforms/cell/cbe_regs.h" + +MODULE_AUTHOR("Kevin Corry <[EMAIL PROTECTED]>, " + "Carl Love <[EMAIL PROTECTED]>"); +MODULE_DESCRIPTION("Cell PMU description table"); +MODULE_LICENSE("GPL"); + +/* + * Mapping from Perfmon logical control registers to Cell hardware registers. + */ +static struct pfm_regmap_desc pfm_cell_pmc_desc[] = { + /* Per-counter control registers. */ + PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0), + + /* Per-counter RTAS arguments. Each of these registers has three fields. + * bits 63-48: debug-bus word + * bits 47-32: sub-unit + * bits 31-0 : full signal number + * (MSB = 63, LSB = 0) + */ + PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0), + + /* Global control registers. Same order as enum pm_reg_name. */ + PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* FIX: Does user-space also need read access to this one? */ + PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0), +}; +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc) + +#define CELL_PMC_PM_STATUS 20 +/* + * Mapping from Perfmon logical data counters to Cell hardware counters. + */ +static struct pfm_regmap_desc pfm_cell_pmd_desc[] = { + PMD_D(PFM_REG_C, "pm0", 0), + PMD_D(PFM_REG_C, "pm1", 0), + PMD_D(PFM_REG_C, "pm2", 0), + PMD_D(PFM_REG_C, "pm3", 0), + PMD_D(PFM_REG_C, "pm4", 0), + PMD_D(PFM_REG_C, "pm5", 0), + PMD_D(PFM_REG_C, "pm6", 0), + PMD_D(PFM_REG_C, "pm7", 0), +}; +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc) + +/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */ +#ifdef CONFIG_SCHED_SMT +#define RTAS_CPU(cpu) ((cpu) / 2) +#else +#define RTAS_CPU(cpu) (cpu) +#endif +#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff) +#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff) +#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff) + +#define subfunc_RESET 1 +#define subfunc_ACTIVATE 2 + +#define passthru_ENABLE 1 +#define passthru_DISABLE 2 + +/** + * struct cell_rtas_arg + * + * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the + * firmware only sees the physical CPUs. So this value should be the + * SMT ID (from smp_processor_id() or get_cpu()) divided by 2. + * @sub_unit: Hardware subunit this applies to (if applicable). + * @signal_group: Signal group to enable/disable on the trace bus. + * @bus_word: For signal groups that propagate via the trace bus, this trace + * bus word will be used. This is a mask of (1 << TraceBusWord). + * For other signal groups, this specifies the trigger or event bus. + * @bit: Trigger/Event bit, if applicable for the signal group. + * + * An array of these structures are passed to rtas_call() to set up the + * signals on the debug bus. + **/ +struct cell_rtas_arg { + u16 cpu; + u16 sub_unit; + s16 signal_group; + u8 bus_word; + u8 bit; +}; + +/** + * rtas_reset_signals + * + * Set up the RTAS arguments for a RESET command. The buffer will be only + * the first entry in the rtas_args[cpu].signal[] array. + **/ +static int rtas_reset_signals(u32 cpu) +{ + struct cell_rtas_arg signal; + u64 real_addr = virt_to_phys(&signal); + int rc; + + memset(&signal, 0, sizeof(signal)); + signal.cpu = RTAS_CPU(cpu); + rc = rtas_call(rtas_token("ibm,cbe-perftools"), + 5, 1, NULL, + subfunc_RESET, + passthru_DISABLE, + real_addr >> 32, + real_addr & 0xffffffff, + sizeof(signal)); + + return rc; +} + +/** + * rtas_activate_signals + * + * Set up the RTAS arguments for an ACTIVATE command. The buffer will be the + * number of entries in the rtas_args[cpu].signal[] array that were filled + * in by attach_signal_to_counter(). + **/ +static int rtas_activate_signals(struct cell_rtas_arg *signals, + int num_signals) +{ + u64 real_addr = virt_to_phys(signals); + int rc; + + rc = rtas_call(rtas_token("ibm,cbe-perftools"), + 5, 1, NULL, + subfunc_ACTIVATE, + passthru_ENABLE, + real_addr >> 32, + real_addr & 0xffffffff, + num_signals * sizeof(*signals)); + + return rc; +} + +/** + * write_pm07_event + * + * Pull out the RTAS arguments from the 64-bit register value and make the + * RTAS activate-signals call. + **/ +static void write_pm07_event(int cpu, unsigned int ctr, u64 value) +{ + struct cell_rtas_arg signal; + int rc; + + signal.cpu = RTAS_CPU(cpu); + signal.bus_word = 1 << RTAS_BUS_WORD(value); + signal.sub_unit = RTAS_SUB_UNIT(value); + signal.signal_group = RTAS_SIGNAL_NUMBER(value) / 100; + signal.bit = RTAS_SIGNAL_NUMBER(value) % 100; + + rc = rtas_activate_signals(&signal, 1); + if (rc) { + PFM_WARN("%s(%d, %u, %lu): Error calling " + "rtas_activate_signal(): %d\n", __FUNCTION__, + cpu, ctr, (unsigned long)value, rc); + /* FIX: Could we change this routine to return an error? */ + } +} + +/** + * pfm_cell_probe_pmu + * + * Simply check the processor version register to see if we're currently + * on a Cell system. + **/ +static int pfm_cell_probe_pmu(void) +{ + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PV_BE) + return -1; + + return 0; +} + +/** + * pfm_cell_write_pmc + **/ +static void pfm_cell_write_pmc(unsigned int cnum, u64 value) +{ + int cpu = smp_processor_id(); + + if (cnum < NR_CTRS) { + cbe_write_pm07_control(cpu, cnum, value); + + } else if (cnum < NR_CTRS * 2) { + write_pm07_event(cpu, cnum - NR_CTRS, value); + + } else if (cnum == CELL_PMC_PM_STATUS) { + /* The pm_status register must be treated separately from + * the other "global" PMCs. This call will ensure that + * the interrupts are routed to the correct CPU, as well + * as writing the desired value to the pm_status register. + */ + cbe_enable_pm_interrupts(cpu, cbe_get_hw_thread_id(cpu), value); + + } else if (cnum < PFM_PM_NUM_PMCS) { + cbe_write_pm(cpu, cnum - (NR_CTRS * 2), value); + } +} + +/** + * pfm_cell_write_pmd + **/ +static void pfm_cell_write_pmd(unsigned int cnum, u64 value) +{ + int cpu = smp_processor_id(); + + if (cnum < NR_CTRS) { + cbe_write_ctr(cpu, cnum, value); + } +} + +/** + * pfm_cell_read_pmd + **/ +static u64 pfm_cell_read_pmd(unsigned int cnum) +{ + int cpu = smp_processor_id(); + + if (cnum < NR_CTRS) { + return cbe_read_ctr(cpu, cnum); + } + + return -EINVAL; +} + +/** + * pfm_cell_enable_counters + * + * Just need to turn on the global disable bit in pm_control. + **/ +static void pfm_cell_enable_counters(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + cbe_enable_pm(smp_processor_id()); +} + +/** + * pfm_cell_disable_counters + * + * Just need to turn off the global disable bit in pm_control. + **/ +static void pfm_cell_disable_counters(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + cbe_disable_pm(smp_processor_id()); +} + +/** + * pfm_cell_restore_pmcs + * + * Write all control register values that are saved in the specified event + * set. We could use the pfm_arch_write_pmc() function to restore each PMC + * individually (as is done in other architectures), but that results in + * multiple RTAS calls. As an optimization, we will setup the RTAS argument + * array so we can do all event-control registers in one RTAS call. + **/ +void pfm_cell_restore_pmcs(struct pfm_event_set *set) +{ + struct cell_rtas_arg signals[NR_CTRS]; + u64 value, *used_pmcs = set->used_pmcs; + int i, rc, num_used = 0, cpu = smp_processor_id(); + + memset(signals, 0, sizeof(signals)); + + for (i = 0; i < NR_CTRS; i++) { + /* Write the per-counter control register. If the PMC is not + * in use, then it will simply clear the register, which will + * disable the associated counter. + */ + cbe_write_pm07_control(cpu, i, set->pmcs[i]); + + if (test_bit(i + NR_CTRS, used_pmcs)) { + /* Set up the next RTAS array entry for this counter. + * Only include pm07_event registers that are in use + * by this set so the RTAS call doesn't have to + * process blank array entries. + */ + value = set->pmcs[i + NR_CTRS]; + signals[num_used].cpu = RTAS_CPU(cpu); + signals[num_used].sub_unit = RTAS_SUB_UNIT(value); + signals[num_used].bus_word = 1 << RTAS_BUS_WORD(value); + signals[num_used].bit = RTAS_SIGNAL_NUMBER(value) % 100; + signals[num_used].signal_group = + RTAS_SIGNAL_NUMBER(value) / 100; + num_used++; + } + } + + rc = rtas_activate_signals(signals, num_used); + if (rc) { + PFM_WARN("Error calling rtas_activate_signal(): %d\n", rc); + /* FIX: We will also need this routine to be able to return + * an error if Stephane agrees to change pfm_arch_write_pmc + * to return an error. + */ + } + + /* Write all the global PMCs. Need to call pfm_cell_write_pmc() + * instead of cbe_write_pm() due to special handling for the + * pm_status register. + */ + for (i *= 2; i < PFM_PM_NUM_PMCS; i++) + pfm_cell_write_pmc(i, set->pmcs[i]); +} + +/** + * pfm_cell_unload_context + * + * For system-wide contexts and self-monitored contexts, make the RTAS call + * to reset the debug-bus signals. + * + * For non-self-monitored contexts, the monitored thread will already have + * been taken off the CPU and we don't need to do anything additional. + **/ +static int pfm_cell_unload_context(struct pfm_context *ctx, + struct task_struct *task) +{ + if (task == current || ctx->flags.system) { + rtas_reset_signals(smp_processor_id()); + } + return 0; +} + +/** + * pfm_cell_ctxswout_thread + * + * When a monitored thread is switched out (self-monitored or externally + * monitored) we need to reset the debug-bus signals so the next context that + * gets switched in can start from a clean set of signals. + **/ +int pfm_cell_ctxswout_thread(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + rtas_reset_signals(smp_processor_id()); + return 0; +} + +/** + * pfm_cell_get_ovfl_pmds + * + * Determine which counters in this set have overflowed and fill in the + * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status + * register contains a bit for each counter to indicate overflow. However, + * those 8 bits are in the reverse order than what Perfmon2 is expecting, + * so we need to reverse the order of the overflow bits. + **/ +static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); + u32 pm_status, ovfl_ctrs; + u64 povfl_pmds = 0; + int i; + + if (!ctx_arch->last_read_updated) + /* This routine was not called via the interrupt handler. + * Need to start by getting interrupts and updating + * last_read_pm_status. + */ + ctx_arch->last_read_pm_status = + cbe_get_and_clear_pm_interrupts(smp_processor_id()); + + /* Reset the flag that the interrupt handler last read pm_status. */ + ctx_arch->last_read_updated = 0; + + pm_status = ctx_arch->last_read_pm_status & + set->pmcs[CELL_PMC_PM_STATUS]; + ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status); + + /* Reverse the order of the bits in ovfl_ctrs + * and store the result in povfl_pmds. + */ + for (i = 0; i < PFM_PM_NUM_PMDS; i++) { + povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1); + ovfl_ctrs >>= 1; + } + + /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds. + * Count the bits set in set->povfl_pmds to get set->npend_ovfls. + */ + bitmap_and(set->povfl_pmds, &povfl_pmds, + set->used_pmds, PFM_PM_NUM_PMDS); + set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS); +} + +/** + * handle_trace_buffer_interrupts + * + * This routine is for processing just the interval timer and trace buffer + * overflow interrupts. Performance counter interrupts are handled by the + * perf_irq_handler() routine, which reads and saves the pm_status register. + * This routine should not read the actual pm_status register, but rather + * the value passed in. + * + * FIX: We don't necessarily need all these parameters. + **/ +static void handle_trace_buffer_interrupts(unsigned long iip, + struct pt_regs *regs, + struct pfm_context *ctx, + struct pfm_arch_context *ctx_arch, + u32 pm_status) +{ + /* FIX: Currently ignoring trace-buffer interrupts. */ + return; +} + +/** + * pfm_cell_irq_handler + * + * Handler for all Cell performance-monitor interrupts. + **/ +static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx) +{ + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); + u32 last_read_pm_status; + int cpu = smp_processor_id(); + + /* Need to disable and reenable the performance counters to get the + * desired behavior from the hardware. This is specific to the Cell + * PMU hardware. + */ + cbe_disable_pm(cpu); + + /* Read the pm_status register to get the interrupt bits. If a + * perfmormance counter interrupt occurred, call the core perfmon + * interrupt handler to service the counter overflow. If the + * interrupt was for the interval timer or the trace_buffer, + * call the interval timer and trace buffer interrupt handler. + * + * The value read from the pm_status register is stored in the + * pmf_arch_context structure for use by other routines. Note that + * reading the pm_status register resets the interrupt flags to zero. + * Hence, it is important that the register is only read in one place. + * + * The pm_status reg interrupt reg format is: + * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:] + * - pmd0 to pm7 are the perf counter overflow interrupts. + * - intt is the interval timer overflowed interrupt. + * - tbf is the trace buffer full interrupt. + * - tbu is the trace buffer underflow interrupt. + * - The pmd0 bit is the MSB of the 32 bit register. + */ + ctx_arch->last_read_pm_status = last_read_pm_status = + cbe_get_and_clear_pm_interrupts(cpu); + + /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows + * last_read_pm_status was updated by the interrupt handler. + */ + ctx_arch->last_read_updated = 1; + + if (last_read_pm_status & 0xFF000000) + /* At least one counter overflowed. */ + pfm_interrupt_handler(instruction_pointer(regs), regs); + + if (last_read_pm_status & 0x00E00000) + /* Trace buffer or interval timer overflow. */ + handle_trace_buffer_interrupts(instruction_pointer(regs), + regs, ctx, ctx_arch, + last_read_pm_status); + + /* The interrupt settings is the value written to the pm_status + * register. It is saved in the context when the register is + * written. + */ + cbe_enable_pm_interrupts(cpu, cbe_get_hw_thread_id(cpu), + ctx->active_set->pmcs[CELL_PMC_PM_STATUS]); + + /* The writes to the various performance counters only writes to a + * latch. The new values (interrupt setting bits, reset counter value + * etc.) are not copied to the actual registers until the performance + * monitor is enabled. In order to get this to work as desired, the + * permormance monitor needs to be disabled while writting to the + * latches. This is a HW design issue. + */ + cbe_enable_pm(cpu); +} + +static struct pfm_arch_pmu_info pfm_cell_pmu_info = { + .pmu_style = PFM_POWERPC_PMU_CELL, + .write_pmc = pfm_cell_write_pmc, + .write_pmd = pfm_cell_write_pmd, + .read_pmd = pfm_cell_read_pmd, + .enable_counters = pfm_cell_enable_counters, + .disable_counters = pfm_cell_disable_counters, + .irq_handler = pfm_cell_irq_handler, + .get_ovfl_pmds = pfm_cell_get_ovfl_pmds, + .restore_pmcs = pfm_cell_restore_pmcs, + .ctxswout_thread = pfm_cell_ctxswout_thread, + .unload_context = pfm_cell_unload_context, +}; + +static struct pfm_pmu_config pfm_cell_pmu_conf = { + .pmu_name = "Cell", + .version = "0.1", + .counter_width = 32, + .pmd_desc = pfm_cell_pmd_desc, + .pmc_desc = pfm_cell_pmc_desc, + .num_pmc_entries = PFM_PM_NUM_PMCS, + .num_pmd_entries = PFM_PM_NUM_PMDS, + .probe_pmu = pfm_cell_probe_pmu, + .arch_info = &pfm_cell_pmu_info, + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, +}; + +static int __init pfm_cell_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_cell_pmu_conf); +} + +static void __exit pfm_cell_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_cell_pmu_conf); +} + +module_init(pfm_cell_pmu_init_module); +module_exit(pfm_cell_pmu_cleanup_module); _______________________________________________ perfmon mailing list perfmon@linux.hpl.hp.com http://www.hpl.hp.com/hosted/linux/mail-archives/perfmon/