On Fri, 18 Nov 2016 08:55:24 -0800
Andi Kleen <[email protected]> wrote:

> From: Andi Kleen <[email protected]>
> 
> ftrace has powerfull trigger functions. Intel PT on modern Intel CPUs
> can trace execution flow.
> 
> For debugging I found it useful to disable the PT trace from ftrace triggers,
> for example when specific kernel functions are hit, which indicate
> a problem. Then we can see the exact execution trace up to this point.
> 
> This patch adds a "ptoff" ftrace trigger/function that disables the trace
> on the current function. The PT trace still has to be set up with perf
> 
> % perf record -e intel_pt// -a ... &
> % cd /sys/kernel/debug/tracing
> % echo do_page_fault:ptoff > set_ftrace_filter
> ...
> % cd -
> % kill %1
> % perf script --itrace=i0ns
> 
> I only implemented local disabling. Enabling would be much more complicated
> and require a black list of functions to avoid recursion. Global
> disabling with IPIs would be possible, but also risk some deadlock
> scenarios. Local disabling is very easy and can be done without
> accessing any special state, so there are no such problems. It is
> usually good enough for debugging purposes. The trace can be always
> reenabled from perf.
> 
> This patch adds "ptoff" both as ftrace trigger and ftrace functions.
> This makes it work from "set_ftrace_filter" and through the trigger
> field of trace points.
> 
> The PT driver exports a pt_disable() function for this that can be also
> used for manual instrumentation.
> 
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Andi Kleen <[email protected]>
> ---
>  Documentation/trace/ftrace.txt      |  5 +++
>  arch/x86/events/intel/pt.c          | 16 ++++++++
>  include/linux/perf_event.h          |  2 +

Peter Z, Thomas, H. Peter, Ingo,

Are you guys fine with this change. If so I can pull this into my tree.

-- Steve

>  include/linux/trace_events.h        |  1 +
>  kernel/trace/trace.c                |  6 +++
>  kernel/trace/trace_events_trigger.c | 79 
> +++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace_functions.c      | 58 +++++++++++++++++++++++++++
>  7 files changed, 167 insertions(+)
> 
> diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
> index 185c39fea2a0..5dc8ec658678 100644
> --- a/Documentation/trace/ftrace.txt
> +++ b/Documentation/trace/ftrace.txt
> @@ -2549,6 +2549,11 @@ The following commands are supported:
>    command, it only prints out the contents of the ring buffer for the
>    CPU that executed the function that triggered the dump.
>  
> +- ptoff
> +  When the function is hit disable Intel PT trace. The Intel PT
> +  trace has to be set up earlier with perf record -a -e intel_pt// ...
> +  This disables the trace on the current CPU only.
> +
>  trace_pipe
>  ----------
>  
> diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
> index c5047b8f777b..cf15881da9a5 100644
> --- a/arch/x86/events/intel/pt.c
> +++ b/arch/x86/events/intel/pt.c
> @@ -1455,3 +1455,19 @@ static __init int pt_init(void)
>       return ret;
>  }
>  arch_initcall(pt_init);
> +
> +/*
> + * Disable the PT trace for debugging purposes.
> + */
> +void pt_disable(void)
> +{
> +     u64 val;
> +
> +     if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
> +             return;
> +
> +     rdmsrl_safe(MSR_IA32_RTIT_CTL, &val);
> +     val &= ~RTIT_CTL_TRACEEN;
> +     wrmsrl_safe(MSR_IA32_RTIT_CTL, val);
> +}
> +EXPORT_SYMBOL(pt_disable);
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 4741ecdb9817..a408d288298b 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1387,4 +1387,6 @@ int perf_event_exit_cpu(unsigned int cpu);
>  #define perf_event_exit_cpu  NULL
>  #endif
>  
> +void pt_disable(void);
> +
>  #endif /* _LINUX_PERF_EVENT_H */
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index be007610ceb0..4d2d4a1b738e 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -391,6 +391,7 @@ enum event_trigger_type {
>       ETT_EVENT_ENABLE        = (1 << 3),
>       ETT_EVENT_HIST          = (1 << 4),
>       ETT_HIST_ENABLE         = (1 << 5),
> +     ETT_PTOFF               = (1 << 6),
>  };
>  
>  extern int filter_match_preds(struct event_filter *filter, void *rec);
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 8696ce6bf2f6..e55405dce821 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -4082,6 +4082,9 @@ static const char readme_msg[] =
>  #endif
>       "\t\t      dump\n"
>       "\t\t      cpudump\n"
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +     "\t\t      ptoff\n"
> +#endif
>       "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
>       "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
>       "\t     The first one will disable tracing every time do_fault is hit\n"
> @@ -4175,6 +4178,9 @@ static const char readme_msg[] =
>  #ifdef CONFIG_HIST_TRIGGERS
>       "\t\t    hist (see below)\n"
>  #endif
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +     "\t\t   ptoff\t\t- Disable PT trace on current CPU\n"
> +#endif
>       "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
>       "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
>       "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
> diff --git a/kernel/trace/trace_events_trigger.c 
> b/kernel/trace/trace_events_trigger.c
> index a26ff1345784..b4ec8c417c12 100644
> --- a/kernel/trace/trace_events_trigger.c
> +++ b/kernel/trace/trace_events_trigger.c
> @@ -22,6 +22,7 @@
>  #include <linux/ctype.h>
>  #include <linux/mutex.h>
>  #include <linux/slab.h>
> +#include <linux/perf_event.h>
>  
>  #include "trace.h"
>  
> @@ -1044,6 +1045,83 @@ static struct event_command trigger_traceoff_cmd = {
>       .set_filter             = set_trigger_filter,
>  };
>  
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +
> +static void
> +ptoff_trigger(struct event_trigger_data *data, void *rec)
> +{
> +     pt_disable();
> +}
> +
> +static void
> +ptoff_count_trigger(struct event_trigger_data *data, void *rec)
> +{
> +     if (!data->count)
> +             return;
> +
> +     if (data->count != -1)
> +             (data->count)--;
> +
> +     ptoff_trigger(data, rec);
> +}
> +
> +static int
> +ptoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
> +                    struct event_trigger_data *data)
> +{
> +     return event_trigger_print("ptoff", m, (void *)data->count,
> +                                data->filter_str);
> +}
> +
> +static struct event_trigger_ops ptoff_trigger_ops = {
> +     .func                   = ptoff_trigger,
> +     .print                  = ptoff_trigger_print,
> +     .init                   = event_trigger_init,
> +     .free                   = event_trigger_free,
> +};
> +
> +static struct event_trigger_ops ptoff_count_trigger_ops = {
> +     .func                   = ptoff_count_trigger,
> +     .print                  = ptoff_trigger_print,
> +     .init                   = event_trigger_init,
> +     .free                   = event_trigger_free,
> +};
> +
> +static struct event_trigger_ops *
> +ptoff_get_trigger_ops(char *cmd, char *param)
> +{
> +     return param ? &ptoff_count_trigger_ops : &ptoff_trigger_ops;
> +}
> +
> +static struct event_command trigger_ptoff_cmd = {
> +     .name                   = "ptoff",
> +     .trigger_type           = ETT_PTOFF,
> +     .func                   = event_trigger_callback,
> +     .reg                    = register_trigger,
> +     .unreg                  = unregister_trigger,
> +     .get_trigger_ops        = ptoff_get_trigger_ops,
> +     .set_filter             = set_trigger_filter,
> +};
> +
> +static __init int register_trigger_ptoff_cmd(void)
> +{
> +     int ret;
> +
> +     if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
> +             return 0;
> +
> +     ret = register_event_command(&trigger_ptoff_cmd);
> +     WARN_ON(ret < 0);
> +
> +     return ret;
> +}
> +
> +#else
> +
> +static inline int register_trigger_ptoff_cmd(void) { return 0; }
> +
> +#endif
> +
>  #ifdef CONFIG_TRACER_SNAPSHOT
>  static void
>  snapshot_trigger(struct event_trigger_data *data, void *rec)
> @@ -1609,6 +1687,7 @@ __init int register_trigger_cmds(void)
>       register_trigger_enable_disable_cmds();
>       register_trigger_hist_enable_disable_cmds();
>       register_trigger_hist_cmd();
> +     register_trigger_ptoff_cmd();
>  
>       return 0;
>  }
> diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
> index 0efa00d80623..80867e3166f7 100644
> --- a/kernel/trace/trace_functions.c
> +++ b/kernel/trace/trace_functions.c
> @@ -15,6 +15,7 @@
>  #include <linux/ftrace.h>
>  #include <linux/slab.h>
>  #include <linux/fs.h>
> +#include <linux/perf_event.h>
>  
>  #include "trace.h"
>  
> @@ -643,6 +644,57 @@ static struct ftrace_func_command ftrace_cpudump_cmd = {
>       .func                   = ftrace_cpudump_callback,
>  };
>  
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +static void
> +ftrace_ptoff_probe(unsigned long ip, unsigned long parent_ip, void **data)
> +{
> +     if (update_count(data))
> +             pt_disable();
> +}
> +
> +static int
> +ftrace_ptoff_print(struct seq_file *m, unsigned long ip,
> +                     struct ftrace_probe_ops *ops, void *data)
> +{
> +     return ftrace_probe_print("ptoff", m, ip, data);
> +}
> +
> +static struct ftrace_probe_ops ptoff_probe_ops = {
> +     .func                   = ftrace_ptoff_probe,
> +     .print                  = ftrace_ptoff_print,
> +};
> +
> +static int
> +ftrace_ptoff_callback(struct ftrace_hash *hash,
> +                        char *glob, char *cmd, char *param, int enable)
> +{
> +     struct ftrace_probe_ops *ops;
> +
> +     ops = &ptoff_probe_ops;
> +
> +     /* Only dump once. */
> +     return ftrace_trace_probe_callback(ops, hash, glob, cmd,
> +                                        "1", enable);
> +}
> +
> +static struct ftrace_func_command ftrace_ptoff_cmd = {
> +     .name                   = "ptoff",
> +     .func                   = ftrace_ptoff_callback,
> +};
> +
> +static int register_ptoff_command(void)
> +{
> +     if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
> +             return 0;
> +     return register_ftrace_command(&ftrace_ptoff_cmd);
> +}
> +
> +#else
> +
> +static inline int register_ptoff_command(void) { return 0; }
> +
> +#endif
> +
>  static int __init init_func_cmd_traceon(void)
>  {
>       int ret;
> @@ -667,8 +719,14 @@ static int __init init_func_cmd_traceon(void)
>       if (ret)
>               goto out_free_dump;
>  
> +     ret = register_ptoff_command();
> +     if (ret)
> +         goto out_free_cpudump;
> +
>       return 0;
>  
> + out_free_cpudump:
> +     unregister_ftrace_command(&ftrace_cpudump_cmd);
>   out_free_dump:
>       unregister_ftrace_command(&ftrace_dump_cmd);
>   out_free_stacktrace:

Reply via email to