Re: [GIT PULL 0/8] perf/pt -> Intel PT/BTS

Alexander Shishkin Fri, 03 Jul 2015 07:36:06 -0700

Ingo Molnar <mi...@kernel.org> writes:

> So I really think we need an extended error reporting feature on the perf 
> kernel 
> side, so that a 'natural' error (plus a string) is reported back to tooling, 
> instead of the current -EINVAL.
>
> No need to do it for everything, doing it for BTS and related functionality 
> would 
> be a good first step to start this.
>
> If you are interested you could try this, or I can try to write something 
> (after 
> the merge window).
>
> So the idea would be to convert such opaque error returns:
>
>         if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
>             !attr->freq && hwc->sample_period == 1) {
>                 /* BTS is not supported by this architecture. */
>                 if (!x86_pmu.bts_active)
>                         return -EOPNOTSUPP;
>
> into:
>
>                       return perf_err(event, -EOPNOTSUPP, "The BTS hardware 
> feature is not available on this CPU.");


So I poked around this a bit and came up with the patch below to give
this topic some more momentum.

Your average error will then be like this:

#define PERF_MODNAME "perf/x86"

...

               /* BTS is currently only allowed for user-mode. */
               if (!attr->exclude_kernel)
                       return perf_err(event, -EOPNOTSUPP,
                                       "BTS sampling not allowed for kernel 
space");

which in userspace will translate into:

---cut---
$ perf record -e branches:uk -c1 ls
kernel says: {
        "code": -95,
        "module": "perf/x86",
        "message": "BTS sampling not allowed for kernel space"
}

Error:
No hardware sampling interrupt available.
No APIC? If so then you can boot the kernel with the "lapic" boot parameter to 
force-enable it.
---cut---

The way I hacked it into tools/perf, it still prints out the old error
message (which, to prove the point once again, is neither here nor
there).

The patch below attaches the error to struct perf_event or copies it
directly to user's buffer in case if we don't have an event at the point
of error. Maybe a slightly easier way of doing it would be to strap it
to task_struct instead, but let's not stir the pot too much this time
around.

Also, to add an extra spin, the report is formatted as a JSON object so
that we could include both human-readable and machine-readable bits.

>From a7de169ef2ca5425cd57286bfb61bfd5f8d15738 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shish...@linux.intel.com>
Date: Fri, 3 Jul 2015 17:12:54 +0300
Subject: [PATCH] perf: Introduce extended syscall error reporting

It has been pointed several times out that perf syscall error reporting
leaves a lot to be desired [1].

This patch introduces a fairly simple extension that allows call sites
to annotate their error codes with arbitrary strings, which will then
be copied to userspace (if they asked for it) along with the module
name that produced the error message in JSON format. This way, we can
provide both human-readable and machine-parsable information to user and
leave room for extensions in the future.

[1] http://marc.info/?l=linux-kernel&m=141470811013082

Signed-off-by: Alexander Shishkin <alexander.shish...@linux.intel.com>
---
 include/linux/perf_event.h      | 38 ++++++++++++++++++++++++
 include/uapi/linux/perf_event.h |  8 ++++-
 kernel/events/core.c            | 66 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1b82d44b0a..15bbef478f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -56,6 +56,37 @@ struct perf_guest_info_callbacks {
 #include <linux/cgroup.h>
 #include <asm/local.h>
 
+#ifndef PERF_MODNAME
+#define PERF_MODNAME KBUILD_MODNAME
+#endif
+
+/*
+ * Extended error reporting: annotate an error code with a string
+ * and a module name to help users diagnase problems with their
+ * attributes and whatnot.
+ */
+struct perf_err_site {
+       const char              *message;
+       const char              *owner;
+       const int               code;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+#define __perf_err(__e, __c, __m) ({                   \
+       static struct perf_err_site __err_site = {      \
+               .message        = (__m),                \
+               .owner          = PERF_MODNAME,         \
+               .code           = (__c),                \
+       };                                              \
+       (__e) = &__err_site;                            \
+       (__c);                                          \
+})
+
+#define perf_err(__evt, __c, __m) ({ __perf_err((__evt)->error, (__c), (__m)); 
})
+
+#endif
+
 struct perf_callchain_entry {
        __u64                           nr;
        __u64                           ip[PERF_MAX_STACK_DEPTH];
@@ -447,6 +478,13 @@ struct perf_event {
        struct list_head                owner_entry;
        struct task_struct              *owner;
 
+       /*
+        * Extended error reporting
+        */
+       const struct perf_err_site      *error;
+       void __user                     *error_buffer;
+       size_t                          error_buffer_size;
+
        /* mmap bits */
        struct mutex                    mmap_mutex;
        atomic_t                        mmap_count;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d97f84c080..d1ae1a079c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -264,6 +264,7 @@ enum perf_event_read_format {
                                        /* add: sample_stack_user */
 #define PERF_ATTR_SIZE_VER4    104     /* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5    112     /* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6    120     /* add: perf_err */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -374,7 +375,12 @@ struct perf_event_attr {
         * Wakeup watermark for AUX area
         */
        __u32   aux_watermark;
-       __u32   __reserved_2;   /* align to __u64 */
+
+       /*
+        * Extended error reporting buffer
+        */
+       __u32   perf_err_size;
+       __u64   perf_err;
 };
 
 #define perf_flags(attr)       (*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8e13f3e54e..fd345c96d1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,6 +49,60 @@
 
 #include <asm/irq_regs.h>
 
+static bool extended_reporting_enabled(struct perf_event_attr *attr)
+{
+       if (attr->size >= PERF_ATTR_SIZE_VER6 &&
+           attr->perf_err_size > 0)
+               return true;
+
+       return false;
+}
+
+/*
+ * Provide a JSON formatted error report to the user if they asked for it.
+ */
+static void __perf_error_report(struct perf_event_attr *attr,
+                               const struct perf_err_site *err_site)
+{
+       void *buffer;
+
+       if (!err_site || !extended_reporting_enabled(attr))
+               return;
+
+       buffer = kasprintf(GFP_KERNEL,
+                          "{\n"
+                          "\t\"code\": %d,\n"
+                          "\t\"module\": \"%s\",\n"
+                          "\t\"message\": \"%s\"\n"
+                          "}\n",
+                          err_site->code, err_site->owner, err_site->message);
+       if (!buffer)
+               return;
+
+       (void)copy_to_user((void __user *)attr->perf_err, buffer,
+                          attr->perf_err_size);
+       kfree(buffer);
+}
+
+/*
+ * Synchronous version of perf_err(), for the paths where we don't have
+ * an event.
+ */
+#define perf_err_attr(__attr, __c, __m) ({             \
+       struct perf_err_site *__site;                   \
+       __perf_err(__site, (__c), (__m));               \
+       __perf_error_report(__attr, __site);            \
+       (__c);                                          \
+})
+
+/*
+ * Report an error before returning from a syscall
+ */
+static void perf_error_report(struct perf_event *event)
+{
+       __perf_error_report(&event->attr, event->error);
+}
+
 static struct workqueue_struct *perf_wq;
 
 typedef int (*remote_function_f)(void *);
@@ -3594,6 +3648,8 @@ static void _free_event(struct perf_event *event)
  */
 static void free_event(struct perf_event *event)
 {
+       perf_error_report(event);
+
        if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
                                "unexpected event refcount: %ld; ptr=%p\n",
                                atomic_long_read(&event->refcount), event)) {
@@ -3661,6 +3717,8 @@ static void put_event(struct perf_event *event)
        if (!atomic_long_dec_and_test(&event->refcount))
                return;
 
+       perf_error_report(event);
+
        if (!is_kernel_event(event))
                perf_remove_from_owner(event);
 
@@ -7634,6 +7692,7 @@ err_ns:
                perf_detach_cgroup(event);
        if (event->ns)
                put_pid_ns(event->ns);
+       perf_error_report(event);
        kfree(event);
 
        return ERR_PTR(err);
@@ -7912,15 +7971,16 @@ SYSCALL_DEFINE5(perf_event_open,
 
        if (!attr.exclude_kernel) {
                if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-                       return -EACCES;
+                       return perf_err_attr(&attr, -EACCES,
+                                            "kernel tracing forbidden for the 
unprivileged");
        }
 
        if (attr.freq) {
                if (attr.sample_freq > sysctl_perf_event_sample_rate)
-                       return -EINVAL;
+                       return perf_err_attr(&attr, -EINVAL, "sample_freq too 
high");
        } else {
                if (attr.sample_period & (1ULL << 63))
-                       return -EINVAL;
+                       return perf_err_attr(&attr, -EINVAL, "sample_period too 
high");
        }
 
        /*
-- 
2.1.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL 0/8] perf/pt -> Intel PT/BTS

Reply via email to