Ingo Molnar <mi...@kernel.org> writes: > So I really think we need an extended error reporting feature on the perf > kernel > side, so that a 'natural' error (plus a string) is reported back to tooling, > instead of the current -EINVAL. > > No need to do it for everything, doing it for BTS and related functionality > would > be a good first step to start this. > > If you are interested you could try this, or I can try to write something > (after > the merge window). > > So the idea would be to convert such opaque error returns: > > if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && > !attr->freq && hwc->sample_period == 1) { > /* BTS is not supported by this architecture. */ > if (!x86_pmu.bts_active) > return -EOPNOTSUPP; > > into: > > return perf_err(event, -EOPNOTSUPP, "The BTS hardware > feature is not available on this CPU.");
So I poked around this a bit and came up with the patch below to give this topic some more momentum. Your average error will then be like this: #define PERF_MODNAME "perf/x86" ... /* BTS is currently only allowed for user-mode. */ if (!attr->exclude_kernel) return perf_err(event, -EOPNOTSUPP, "BTS sampling not allowed for kernel space"); which in userspace will translate into: ---cut--- $ perf record -e branches:uk -c1 ls kernel says: { "code": -95, "module": "perf/x86", "message": "BTS sampling not allowed for kernel space" } Error: No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the "lapic" boot parameter to force-enable it. ---cut--- The way I hacked it into tools/perf, it still prints out the old error message (which, to prove the point once again, is neither here nor there). The patch below attaches the error to struct perf_event or copies it directly to user's buffer in case if we don't have an event at the point of error. Maybe a slightly easier way of doing it would be to strap it to task_struct instead, but let's not stir the pot too much this time around. Also, to add an extra spin, the report is formatted as a JSON object so that we could include both human-readable and machine-readable bits. >From a7de169ef2ca5425cd57286bfb61bfd5f8d15738 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin <alexander.shish...@linux.intel.com> Date: Fri, 3 Jul 2015 17:12:54 +0300 Subject: [PATCH] perf: Introduce extended syscall error reporting It has been pointed several times out that perf syscall error reporting leaves a lot to be desired [1]. This patch introduces a fairly simple extension that allows call sites to annotate their error codes with arbitrary strings, which will then be copied to userspace (if they asked for it) along with the module name that produced the error message in JSON format. This way, we can provide both human-readable and machine-parsable information to user and leave room for extensions in the future. [1] http://marc.info/?l=linux-kernel&m=141470811013082 Signed-off-by: Alexander Shishkin <alexander.shish...@linux.intel.com> --- include/linux/perf_event.h | 38 ++++++++++++++++++++++++ include/uapi/linux/perf_event.h | 8 ++++- kernel/events/core.c | 66 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 108 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1b82d44b0a..15bbef478f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -56,6 +56,37 @@ struct perf_guest_info_callbacks { #include <linux/cgroup.h> #include <asm/local.h> +#ifndef PERF_MODNAME +#define PERF_MODNAME KBUILD_MODNAME +#endif + +/* + * Extended error reporting: annotate an error code with a string + * and a module name to help users diagnase problems with their + * attributes and whatnot. + */ +struct perf_err_site { + const char *message; + const char *owner; + const int code; +}; + +#ifdef CONFIG_PERF_EVENTS + +#define __perf_err(__e, __c, __m) ({ \ + static struct perf_err_site __err_site = { \ + .message = (__m), \ + .owner = PERF_MODNAME, \ + .code = (__c), \ + }; \ + (__e) = &__err_site; \ + (__c); \ +}) + +#define perf_err(__evt, __c, __m) ({ __perf_err((__evt)->error, (__c), (__m)); }) + +#endif + struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; @@ -447,6 +478,13 @@ struct perf_event { struct list_head owner_entry; struct task_struct *owner; + /* + * Extended error reporting + */ + const struct perf_err_site *error; + void __user *error_buffer; + size_t error_buffer_size; + /* mmap bits */ struct mutex mmap_mutex; atomic_t mmap_count; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d97f84c080..d1ae1a079c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -264,6 +264,7 @@ enum perf_event_read_format { /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* add: perf_err */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -374,7 +375,12 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; - __u32 __reserved_2; /* align to __u64 */ + + /* + * Extended error reporting buffer + */ + __u32 perf_err_size; + __u64 perf_err; }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8e13f3e54e..fd345c96d1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -49,6 +49,60 @@ #include <asm/irq_regs.h> +static bool extended_reporting_enabled(struct perf_event_attr *attr) +{ + if (attr->size >= PERF_ATTR_SIZE_VER6 && + attr->perf_err_size > 0) + return true; + + return false; +} + +/* + * Provide a JSON formatted error report to the user if they asked for it. + */ +static void __perf_error_report(struct perf_event_attr *attr, + const struct perf_err_site *err_site) +{ + void *buffer; + + if (!err_site || !extended_reporting_enabled(attr)) + return; + + buffer = kasprintf(GFP_KERNEL, + "{\n" + "\t\"code\": %d,\n" + "\t\"module\": \"%s\",\n" + "\t\"message\": \"%s\"\n" + "}\n", + err_site->code, err_site->owner, err_site->message); + if (!buffer) + return; + + (void)copy_to_user((void __user *)attr->perf_err, buffer, + attr->perf_err_size); + kfree(buffer); +} + +/* + * Synchronous version of perf_err(), for the paths where we don't have + * an event. + */ +#define perf_err_attr(__attr, __c, __m) ({ \ + struct perf_err_site *__site; \ + __perf_err(__site, (__c), (__m)); \ + __perf_error_report(__attr, __site); \ + (__c); \ +}) + +/* + * Report an error before returning from a syscall + */ +static void perf_error_report(struct perf_event *event) +{ + __perf_error_report(&event->attr, event->error); +} + static struct workqueue_struct *perf_wq; typedef int (*remote_function_f)(void *); @@ -3594,6 +3648,8 @@ static void _free_event(struct perf_event *event) */ static void free_event(struct perf_event *event) { + perf_error_report(event); + if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1, "unexpected event refcount: %ld; ptr=%p\n", atomic_long_read(&event->refcount), event)) { @@ -3661,6 +3717,8 @@ static void put_event(struct perf_event *event) if (!atomic_long_dec_and_test(&event->refcount)) return; + perf_error_report(event); + if (!is_kernel_event(event)) perf_remove_from_owner(event); @@ -7634,6 +7692,7 @@ err_ns: perf_detach_cgroup(event); if (event->ns) put_pid_ns(event->ns); + perf_error_report(event); kfree(event); return ERR_PTR(err); @@ -7912,15 +7971,16 @@ SYSCALL_DEFINE5(perf_event_open, if (!attr.exclude_kernel) { if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + return perf_err_attr(&attr, -EACCES, + "kernel tracing forbidden for the unprivileged"); } if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) - return -EINVAL; + return perf_err_attr(&attr, -EINVAL, "sample_freq too high"); } else { if (attr.sample_period & (1ULL << 63)) - return -EINVAL; + return perf_err_attr(&attr, -EINVAL, "sample_period too high"); } /* -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/