Device tree IMC driver code parses the IMC units and their events. It
passes the information to IMC pmu code which is placed in powerpc/perf
as "imc-pmu.c".

Patch adds a set of generic imc pmu related event functions to be
used  by each imc pmu unit. Add code to setup format attribute and to
register imc pmus. Add a event_init function for nest_imc events.

Since, the IMC counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMC PMU units.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h        |   5 +
 arch/powerpc/perf/Makefile                |   3 +
 arch/powerpc/perf/imc-pmu.c               | 279 ++++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-imc.c |  11 +-
 4 files changed, 296 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/perf/imc-pmu.c

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 2a0239e..25d0c57 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -63,6 +63,9 @@ struct imc_events {
 #define IMC_CPUMASK_ATTR       1
 #define IMC_EVENT_ATTR         2
 #define IMC_NULL_ATTR          3
+#define IMC_EVENT_OFFSET_MASK  0xffffffffULL
+#define IMC_EVENT_RVALUE_MASK  0x100000000ULL
+#define IMC_NEST_EVENT_MODE    0x1fe00000000ULL
 
 /*
  * Device tree parser code detects IMC pmu support and
@@ -101,4 +104,6 @@ enum {
  */
 #define IMC_DOMAIN_NEST                1
 
+extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b9..b29d918 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -6,6 +6,9 @@ obj-$(CONFIG_PPC_PERF_CTRS)     += core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += power4-pmu.o ppc970-pmu.o power5-pmu.o \
                                   power5+-pmu.o power6-pmu.o power7-pmu.o \
                                   isa207-common.o power8-pmu.o power9-pmu.o
+
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += imc-pmu.o
+
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 0000000..326c9ea
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,279 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *           (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *           (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+
+/* Needed for sanity check */
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+       return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *nest_imc_format_attrs[] = {
+       &format_attr_event.attr,
+       &format_attr_offset.attr,
+       &format_attr_rvalue.attr,
+       &format_attr_mode.attr,
+       NULL,
+};
+
+static struct attribute_group imc_format_group = {
+       .name = "format",
+       .attrs = nest_imc_format_attrs,
+};
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+       int chip_id;
+       u32 l_config, config = event->attr.config;
+       struct imc_mem_info *pcni;
+       struct imc_pmu *pmu;
+       bool flag = false;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /* Sampling not supported */
+       if (event->hw.sample_period)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest)
+               return -EINVAL;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       pmu = imc_event_to_pmu(event);
+       /*
+        * Sanity check for config (event offset, mode and rvalue).
+        * mode and rvalue should be zero, if not just return.
+        */
+       if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
+           ((config & IMC_EVENT_RVALUE_MASK) != 0) ||
+           ((config & IMC_NEST_EVENT_MODE) != 0))
+               return -EINVAL;
+
+       chip_id = topology_physical_package_id(event->cpu);
+       pcni = pmu->mem_info;
+       do {
+               if (pcni->id == chip_id) {
+                       flag = true;
+                       break;
+               }
+               pcni++;
+       } while (pcni);
+       if (!flag)
+               return -ENODEV;
+       /*
+        * Memory for Nest HW counter data could be in multiple pages.
+        * Hence check and pick the right event base page for chip with
+        * "chip_id" and add "config" to it".
+        */
+       l_config = config & IMC_EVENT_OFFSET_MASK;
+       event->hw.event_base = (u64)pcni->vbase[l_config/PAGE_SIZE] + (config & 
~PAGE_MASK);
+       return 0;
+}
+
+static void imc_read_counter(struct perf_event *event)
+{
+       u64 *addr, data;
+
+       /*
+        * In-Memory Collection (IMC) counters are free flowing counters.
+        * So we take a snapshot of the counter value on enable and save it
+        * to calculate the delta at later stage to present the event counter
+        * value.
+        */
+       addr = (u64 *)event->hw.event_base;
+       data = __be64_to_cpu(READ_ONCE(*addr));
+       local64_set(&event->hw.prev_count, data);
+}
+
+static void imc_perf_event_update(struct perf_event *event)
+{
+       u64 counter_prev, counter_new, final_count, *addr;
+
+       addr = (u64 *)event->hw.event_base;
+       counter_prev = local64_read(&event->hw.prev_count);
+       counter_new = __be64_to_cpu(READ_ONCE(*addr));
+       final_count = counter_new - counter_prev;
+
+       /*
+        * Need to update prev_count is that, counter could be
+        * read in a periodic interval from the tool side.
+        */
+       local64_set(&event->hw.prev_count, counter_new);
+       /* Update the delta to the event count */
+       local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+       /*
+        * In Memory Counters are free flowing counters. HW or the microcode
+        * keeps adding to the counter offset in memory. To get event
+        * counter value, we snapshot the value here and we calculate
+        * delta at later point.
+        */
+       imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+       /*
+        * Take a snapshot and calculate the delta and update
+        * the event counter values.
+        */
+       imc_perf_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+       if (flags & PERF_EF_START)
+               imc_event_start(event, flags);
+
+       return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+       if (!pmu)
+               return -EINVAL;
+
+       pmu->pmu.task_ctx_nr = perf_invalid_context;
+       pmu->pmu.event_init = nest_imc_event_init;
+       pmu->pmu.add = imc_event_add;
+       pmu->pmu.del = imc_event_stop;
+       pmu->pmu.start = imc_event_start;
+       pmu->pmu.stop = imc_event_stop;
+       pmu->pmu.read = imc_perf_event_update;
+       pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+       pmu->pmu.attr_groups = pmu->attr_groups;
+
+       return 0;
+}
+
+/* dev_str_attr : Populate event "name" and string "str" in attribute */
+static struct attribute *dev_str_attr(const char *name, const char *str)
+{
+       struct perf_pmu_events_attr *attr;
+
+       attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+       if (!attr)
+               return NULL;
+       sysfs_attr_init(&attr->attr.attr);
+
+       attr->event_str = str;
+       attr->attr.attr.name = name;
+       attr->attr.attr.mode = 0444;
+       attr->attr.show = perf_event_sysfs_show;
+
+       return &attr->attr.attr;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ *                         and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct imc_events *events,
+                                 int idx, struct imc_pmu *pmu)
+{
+       struct attribute_group *attr_group;
+       struct attribute **attrs;
+       int i;
+
+       /* If there is no events for this pmu, just return zero */
+       if (!events)
+               return 0;
+
+       /* Allocate memory for attribute group */
+       attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+       if (!attr_group)
+               return -ENOMEM;
+
+       /* Allocate memory for attributes */
+       attrs = kzalloc((sizeof(struct attribute *) * (idx + 1)), GFP_KERNEL);
+       if (!attrs) {
+               kfree(attr_group);
+               return -ENOMEM;
+       }
+
+       attr_group->name = "events";
+       attr_group->attrs = attrs;
+       for (i = 0; i < idx; i++, events++) {
+               attrs[i] = dev_str_attr((char *)events->ev_name,
+                                       (char *)events->ev_value);
+       }
+
+       /* Save the event attribute */
+       pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+       return 0;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @events:    events memory for this pmu.
+ * @idx:       number of event entries created.
+ * @pmu_ptr:   memory allocated for this pmu.
+ */
+int init_imc_pmu(struct imc_events *events, int idx,
+                struct imc_pmu *pmu_ptr)
+{
+       int ret = -ENODEV;
+
+       ret = update_events_in_group(events, idx, pmu_ptr);
+       if (ret)
+               goto err_free;
+
+       ret = update_pmu_ops(pmu_ptr);
+       if (ret)
+               goto err_free;
+
+       ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+       if (ret)
+               goto err_free;
+
+       pr_info("%s performance monitor hardware support registered\n",
+               pmu_ptr->pmu.name);
+
+       return 0;
+
+err_free:
+       /* Only free the attr_groups which are dynamically allocated  */
+       if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) {
+               if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs)
+                       kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+               kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+       }
+
+       return ret;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 839c257..a68d66d 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -34,8 +34,6 @@
 #include <asm/cputable.h>
 #include <asm/imc-pmu.h>
 
-struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
-
 static int imc_event_prop_update(char *name, struct imc_events *events)
 {
        char *buf;
@@ -452,8 +450,17 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
                if (prop)
                        imc_events_setup(parent, pmu_index, pmu_ptr, prop, 
&idx);
        }
+       /* Function to register IMC pmu */
+       ret = init_imc_pmu(pmu_ptr->events, idx, pmu_ptr);
+       if (ret) {
+               pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+               goto free_events;
+       }
        return 0;
 
+free_events:
+       if (pmu_ptr->events)
+               imc_free_events(pmu_ptr->events, idx);
 free_pmu:
        if (pmu_ptr)
                kfree(pmu_ptr);
-- 
2.7.4

Reply via email to