This patch adds support for performance monitoring of papr nvdimm devices via perf interface. It adds callbacks functions like add/del/read/event_init for nvdimm_pmu structure.
Patch adds a new parameter 'priv' in pdev_archdata structure to save nvdimm_pmu device pointer, to handle the unregistering of pmu device. papr_scm_pmu_register function populates the nvdimm_pmu structure with events, attribute groups along with event handling functions. Event handling functions internally uses hcall to get events and counter data. Finally the populated nvdimm_pmu structure is passed to register the pmu device. Result in power9 machine with 2 nvdimm device: Ex: List all event by perf list command:# perf list nmem nmem0/cchrhcnt/ [Kernel PMU event] nmem0/cchwhcnt/ [Kernel PMU event] nmem0/critrscu/ [Kernel PMU event] nmem0/ctlresct/ [Kernel PMU event] nmem0/ctlrestm/ [Kernel PMU event] nmem0/fastwcnt/ [Kernel PMU event] nmem0/hostlcnt/ [Kernel PMU event] nmem0/hostldur/ [Kernel PMU event] nmem0/hostscnt/ [Kernel PMU event] nmem0/hostsdur/ [Kernel PMU event] nmem0/medrcnt/ [Kernel PMU event] nmem0/medrdur/ [Kernel PMU event] nmem0/medwcnt/ [Kernel PMU event] nmem0/medwdur/ [Kernel PMU event] nmem0/memlife/ [Kernel PMU event] nmem0/noopstat/ [Kernel PMU event] nmem0/ponsecs/ [Kernel PMU event] nmem1/cchrhcnt/ [Kernel PMU event] nmem1/cchwhcnt/ [Kernel PMU event] nmem1/critrscu/ [Kernel PMU event] ... nmem1/noopstat/ [Kernel PMU event] nmem1/ponsecs/ [Kernel PMU event] Signed-off-by: Kajol Jain <kj...@linux.ibm.com> --- arch/powerpc/include/asm/device.h | 5 + arch/powerpc/platforms/pseries/papr_scm.c | 284 +++++++++++++++++++++- 2 files changed, 288 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 219559d65864..47ed639f3b8f 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -48,6 +48,11 @@ struct dev_archdata { struct pdev_archdata { u64 dma_mask; + /* + * Pointer to nvdimm_pmu structure, to handle the unregistering + * of pmu device + */ + void *priv; }; #endif /* _ASM_POWERPC_DEVICE_H */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ef26fe40efb0..997d379094d0 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -18,6 +18,8 @@ #include <asm/plpar_wrappers.h> #include <asm/papr_pdsm.h> #include <asm/mce.h> +#include <linux/perf_event.h> +#include <linux/ctype.h> #define BIND_ANY_ADDR (~0ul) @@ -116,6 +118,9 @@ struct papr_scm_priv { /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; + + /* array to have event_code and stat_id mappings */ + char **nvdimm_events_map; }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -329,6 +334,271 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, return 0; } +static struct attribute_group nvdimm_pmu_events_group = { + .name = "events", + /* .attrs is set in papr_scm_pmu_check_events function */ +}; + +PMU_FORMAT_ATTR(event, "config:0-37"); + +static struct attribute *nvdimm_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group nvdimm_pmu_format_group = { + .name = "format", + .attrs = nvdimm_pmu_format_attr, +}; + +static const struct attribute_group *nvdimm_pmu_attr_groups[] = { + &nvdimm_pmu_format_group, + &nvdimm_pmu_events_group, + NULL, +}; + +static void papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) +{ + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data; + int rc, size; + u64 statval; + + /* Allocate buffer to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + + sizeof(struct papr_scm_perf_stat); + + if (!p->nvdimm_events_map) + return; + + stats = kzalloc(size, GFP_KERNEL); + if (!stats) + return; + + stat = &stats->scm_statistic[0]; + memcpy(&stat->stat_id, + p->nvdimm_events_map[event->attr.config - 1], + sizeof(stat->stat_id)); + stat->stat_val = 0; + + rc = drc_pmem_query_stats(p, stats, 1); + if (rc < 0) { + kfree(stats); + return; + } + + statval = be64_to_cpu(stat->stat_val); + *count = statval; + kfree(stats); +} + +static int papr_scm_pmu_add(struct perf_event *event, int flags, struct device *dev) +{ + u64 count = 0; + + papr_scm_pmu_get_value(event, dev, &count); + local64_set(&event->hw.prev_count, count); + return 0; +} + +static void papr_scm_pmu_read(struct perf_event *event, struct device *dev) +{ + u64 prev, now = 0; + + papr_scm_pmu_get_value(event, dev, &now); + prev = local64_xchg(&event->hw.prev_count, now); + + if (now - prev >= 0) + local64_add(now - prev, &event->count); +} + +static void papr_scm_pmu_del(struct perf_event *event, int flags, struct device *dev) +{ + papr_scm_pmu_read(event, dev); +} + +static void nvdimm_pmu_uinit(struct nvdimm_pmu *nd_pmu) +{ + unregister_nvdimm_pmu(&nd_pmu->pmu); + kfree(nd_pmu); +} + +static int papr_scm_pmu_register(struct papr_scm_priv *p) +{ + struct nvdimm_pmu *papr_scm_pmu; + int rc; + + papr_scm_pmu = devm_kzalloc(&p->pdev->dev, sizeof(*papr_scm_pmu), GFP_KERNEL); + if (!papr_scm_pmu) + return -ENOMEM; + + papr_scm_pmu->name = nvdimm_name(p->nvdimm); + papr_scm_pmu->read = papr_scm_pmu_read; + papr_scm_pmu->add = papr_scm_pmu_add; + papr_scm_pmu->del = papr_scm_pmu_del; + papr_scm_pmu->attr_groups = nvdimm_pmu_attr_groups; + + rc = register_nvdimm_pmu(papr_scm_pmu, p->pdev); + if (rc) + goto pmu_register_err; + + /* + * Set archdata.priv value to nvdimm_pmu structure, to handle the + * unregistering of pmu device. + */ + p->pdev->archdata.priv = papr_scm_pmu; + return 0; + +pmu_register_err: + kfree(papr_scm_pmu); + return rc; +} + +static ssize_t device_show_string(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct perf_pmu_events_attr *d; + + d = container_of(attr, struct perf_pmu_events_attr, attr); + + return sysfs_emit(buf, "%s\n", (char *)d->event_str); +} + +static char *strtolower(char *updated_name) +{ + int i = 0; + + while (updated_name[i]) { + if (isupper(updated_name[i])) + updated_name[i] = tolower(updated_name[i]); + i++; + } + updated_name[i] = '\0'; + return strim(updated_name); +} + +/* device_str_attr_create : Populate event "name" and string "str" in attribute */ +static struct attribute *device_str_attr_create_(char *name, char *str) +{ + struct perf_pmu_events_attr *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + + if (!attr) + return NULL; + + sysfs_attr_init(&attr->attr.attr); + attr->event_str = str; + attr->attr.attr.name = strtolower(name); + attr->attr.attr.mode = 0444; + attr->attr.show = device_show_string; + + return &attr->attr.attr; +} + +static int papr_scm_pmu_check_events(struct papr_scm_priv *p) +{ + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats, *single_stats; + int index, size, rc, attrs; + u32 total_events; + struct attribute **events; + char *eventcode, *eventname, *statid; + + if (!p->stat_buffer_len) + return -ENOENT; + + total_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) + / sizeof(struct papr_scm_perf_stat); + + /* Allocate the buffer for phyp where stats are written */ + stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + /* Allocate memory to nvdimm_event_map */ + p->nvdimm_events_map = kcalloc(total_events, sizeof(char *), GFP_KERNEL); + if (!p->nvdimm_events_map) { + rc = -ENOMEM; + goto out_stats; + } + + /* Called to get list of events supported */ + rc = drc_pmem_query_stats(p, stats, 0); + if (rc) + goto out_nvdimm_events_map; + + /* Allocate buffer to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + sizeof(struct papr_scm_perf_stat); + + single_stats = kzalloc(size, GFP_KERNEL); + if (!single_stats) { + rc = -ENOMEM; + goto out_nvdimm_events_map; + } + + events = kzalloc(total_events * sizeof(struct attribute *), GFP_KERNEL); + if (!events) { + rc = -ENOMEM; + goto out_single_stats; + } + + for (index = 0, stat = stats->scm_statistic, attrs = 0; + index < total_events; index++, ++stat) { + + single_stats->scm_statistic[0] = *stat; + rc = drc_pmem_query_stats(p, single_stats, 1); + + if (rc < 0) { + pr_info("Event not supported %s for device %s\n", + stat->stat_id, nvdimm_name(p->nvdimm)); + } else { + eventcode = kasprintf(GFP_KERNEL, "event=0x%x", attrs + 1); + eventname = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL); + statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL); + + if (!eventname || !statid || !eventcode) + goto out; + + strcpy(eventname, stat->stat_id); + events[attrs] = device_str_attr_create_(eventname, + eventcode); + if (!events[attrs]) + goto out; + + strcpy(statid, stat->stat_id); + p->nvdimm_events_map[attrs] = statid; + attrs++; + continue; +out: + kfree(eventcode); + kfree(eventname); + kfree(statid); + } + } + events[attrs] = NULL; + p->nvdimm_events_map[attrs] = NULL; + + if (!attrs) + goto out_events; + + nvdimm_pmu_events_group.attrs = events; + kfree(single_stats); + kfree(stats); + return 0; + +out_events: + kfree(events); +out_single_stats: + kfree(single_stats); +out_nvdimm_events_map: + kfree(p->nvdimm_events_map); +out_stats: + kfree(stats); + return rc; +} + /* * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the * health information. @@ -923,7 +1193,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_mapping_desc mapping; struct nd_region_desc ndr_desc; unsigned long dimm_flags; - int target_nid, online_nid; + int target_nid, online_nid, rc; ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; @@ -1015,6 +1285,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->stat_buffer_len = stat_size; dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", p->stat_buffer_len); + + rc = papr_scm_pmu_check_events(p); + if (rc) { + dev_info(&p->pdev->dev, "nvdimm pmu check events failed, rc=%d\n", rc); + } else { + rc = papr_scm_pmu_register(p); + if (rc) + dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); + } } else { dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); } @@ -1195,7 +1474,10 @@ static int papr_scm_remove(struct platform_device *pdev) nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); + nvdimm_pmu_uinit(pdev->archdata.priv); + pdev->archdata.priv = NULL; kfree(p->bus_desc.provider_name); + kfree(p->nvdimm_events_map); kfree(p); return 0; -- 2.27.0 _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-le...@lists.01.org