From: Anju T Sudhakar <a...@linux.vnet.ibm.com> Device tree IMC driver code parses the IMC units and their events. It passes the information to IMC pmu code which is placed in powerpc/perf as "imc-pmu.c".
Patch adds a set of generic imc pmu related event functions to be used by each imc pmu unit. Add code to setup format attribute and to register imc pmus. Add a event_init function for nest_imc events. Since, the IMC counters' data are periodically fed to a memory location, the functions to read/update, start/stop, add/del can be generic and can be used by all IMC PMU units. Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com> Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com> Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com> --- arch/powerpc/include/asm/imc-pmu.h | 2 + arch/powerpc/perf/Makefile | 3 + arch/powerpc/perf/imc-pmu.c | 268 ++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-imc.c | 10 +- 4 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/perf/imc-pmu.c diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index ffaea0b9c13e..865cd756bfb6 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -96,4 +96,6 @@ struct imc_pmu { */ #define IMC_DOMAIN_NEST 1 +extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; +extern int __init init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr); #endif /* PPC_POWERNV_IMC_PMU_DEF_H */ diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 4d606b99a5cb..b29d918814d3 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -6,6 +6,9 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o + +obj-$(CONFIG_HV_PERF_IMC_CTRS) += imc-pmu.o + obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c new file mode 100644 index 000000000000..e410788af10f --- /dev/null +++ b/arch/powerpc/perf/imc-pmu.c @@ -0,0 +1,268 @@ +/* + * Nest Performance Monitor counter support. + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ +#include <linux/perf_event.h> +#include <linux/slab.h> +#include <asm/opal.h> +#include <asm/imc-pmu.h> +#include <asm/cputhreads.h> +#include <asm/smp.h> +#include <linux/string.h> + +/* Needed for sanity check */ +extern u64 nest_max_offset; +struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; + +struct imc_pmu *imc_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct imc_pmu, pmu); +} + +PMU_FORMAT_ATTR(event, "config:0-20"); +static struct attribute *imc_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group imc_format_group = { + .name = "format", + .attrs = imc_format_attrs, +}; + +static int nest_imc_event_init(struct perf_event *event) +{ + int chip_id; + u32 config = event->attr.config; + struct imc_mem_info *pcni; + struct imc_pmu *pmu; + bool flag = false; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling not supported */ + if (event->hw.sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + /* Sanity check for config (event offset) */ + if (config > nest_max_offset) + return -EINVAL; + + chip_id = topology_physical_package_id(event->cpu); + pmu = imc_event_to_pmu(event); + pcni = pmu->mem_info; + do { + if (pcni->id == chip_id) { + flag = true; + break; + } + pcni++; + }while(pcni); + if (!flag) + return -ENODEV; + /* + * Memory for Nest HW counter data could be in multiple pages. + * Hence check and pick the right event base page for chip with + * "chip_id" and add "config" to it". + */ + event->hw.event_base = (u64)pcni->vbase[config/PAGE_SIZE] + (config & ~PAGE_MASK); + return 0; +} + +static void imc_read_counter(struct perf_event *event) +{ + u64 *addr, data; + + /* + * In-Memory Collection (IMC) counters are free flowing counters. + * So we take a snapshot of the counter value on enable and save it + * to calculate the delta at later stage to present the event counter + * value. + */ + addr = (u64 *)event->hw.event_base; + data = __be64_to_cpu(READ_ONCE(*addr)); + local64_set(&event->hw.prev_count, data); +} + +static void imc_perf_event_update(struct perf_event *event) +{ + u64 counter_prev, counter_new, final_count, *addr; + + addr = (u64 *)event->hw.event_base; + counter_prev = local64_read(&event->hw.prev_count); + counter_new = __be64_to_cpu(READ_ONCE(*addr)); + final_count = counter_new - counter_prev; + + /* + * Need to update prev_count is that, counter could be + * read in a periodic interval from the tool side. + */ + local64_set(&event->hw.prev_count, counter_new); + /* Update the delta to the event count */ + local64_add(final_count, &event->count); +} + +static void imc_event_start(struct perf_event *event, int flags) +{ + /* + * In Memory Counters are free flowing counters. HW or the microcode + * keeps adding to the counter offset in memory. To get event + * counter value, we snapshot the value here and we calculate + * delta at later point. + */ + imc_read_counter(event); +} + +static void imc_event_stop(struct perf_event *event, int flags) +{ + /* + * Take a snapshot and calculate the delta and update + * the event counter values. + */ + imc_perf_event_update(event); +} + +static int imc_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + imc_event_start(event, flags); + + return 0; +} + +/* update_pmu_ops : Populate the appropriate operations for "pmu" */ +static int update_pmu_ops(struct imc_pmu *pmu) +{ + if (!pmu) + return -EINVAL; + + pmu->pmu.task_ctx_nr = perf_invalid_context; + pmu->pmu.event_init = nest_imc_event_init; + pmu->pmu.add = imc_event_add; + pmu->pmu.del = imc_event_stop; + pmu->pmu.start = imc_event_start; + pmu->pmu.stop = imc_event_stop; + pmu->pmu.read = imc_perf_event_update; + pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group; + pmu->pmu.attr_groups = pmu->attr_groups; + + return 0; +} + +/* dev_str_attr : Populate event "name" and string "str" in attribute */ +static struct attribute *dev_str_attr(const char *name, const char *str) +{ + struct perf_pmu_events_attr *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return NULL; + sysfs_attr_init(&attr->attr.attr); + + attr->event_str = str; + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = perf_event_sysfs_show; + + return &attr->attr.attr; +} + +/* + * update_events_in_group: Update the "events" information in an attr_group + * and assign the attr_group to the pmu "pmu". + */ +static int update_events_in_group(struct imc_events *events, + int idx, struct imc_pmu *pmu) +{ + struct attribute_group *attr_group; + struct attribute **attrs; + int i; + + /* If there is no events for this pmu, just return zero */ + if (!events) + return 0; + + /* Allocate memory for attribute group */ + attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL); + if (!attr_group) + return -ENOMEM; + + /* Allocate memory for attributes */ + attrs = kzalloc((sizeof(struct attribute *) * (idx + 1)), GFP_KERNEL); + if (!attrs) { + kfree(attr_group); + return -ENOMEM; + } + + attr_group->name = "events"; + attr_group->attrs = attrs; + for (i = 0; i < idx; i++, events++) { + attrs[i] = dev_str_attr((char *)events->ev_name, + (char *)events->ev_value); + } + + /* Save the event attribute */ + pmu->attr_groups[IMC_EVENT_ATTR] = attr_group; + return 0; +} + +/* + * init_imc_pmu : Setup and register the IMC pmu device. + * + * @events: events memory for this pmu. + * @idx: number of event entries created. + * @pmu_ptr: memory allocated for this pmu. + */ +int __init init_imc_pmu(struct imc_events *events, int idx, + struct imc_pmu *pmu_ptr) +{ + int ret = -ENODEV; + + ret = update_events_in_group(events, idx, pmu_ptr); + if (ret) + goto err_free; + + ret = update_pmu_ops(pmu_ptr); + if (ret) + goto err_free; + + ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1); + if (ret) + goto err_free; + + pr_info("%s performance monitor hardware support registered\n", + pmu_ptr->pmu.name); + + return 0; + +err_free: + /* Only free the attr_groups which are dynamically allocated */ + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) { + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); + } + + return ret; +} diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 6a38f7c9ea66..c2a6019536a1 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -35,7 +35,6 @@ #include <asm/imc-pmu.h> u64 nest_max_offset; -struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; static int imc_event_prop_update(char *name, struct imc_events *events) { @@ -471,8 +470,17 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) if (prop) imc_events_setup(parent, pmu_index, pmu_ptr, prop, &idx); } + /* Function to register IMC pmu */ + ret = init_imc_pmu(pmu_ptr->events, idx, pmu_ptr); + if (ret) { + pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name); + goto free_events; + } return 0; +free_events: + if (pmu_ptr->events) + imc_free_events(pmu_ptr->events, idx); free_pmu: if (pmu_ptr) kfree(pmu_ptr); -- 2.7.4