Synthetic events are user-defined events generated from hist trigger
variables saved from one or more other events.

To define a synthetic event, the user writes a simple specification
consisting of the name of the new event along with one or more
variables and their type(s), to the tracing/synthetic_events file.

For instance, the following creates a new event named 'wakeup_latency'
with 3 fields: lat, pid, and prio:

    # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \
      /sys/kernel/debug/tracing/synthetic_events

Reading the tracing/synthetic_events file lists all the
currently-defined synthetic events, in this case the event we defined
above:

    # cat /sys/kernel/debug/tracing/synthetic_events
    wakeup_latency u64 lat; pid_t pid; int prio

At this point, the synthetic event is ready to use, and a histogram
can be defined using it:

    # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
    /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger

The new event is created under the tracing/events/synthetic/ directory
and looks and behaves just like any other event:

    # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
      enable  filter  format  hist  id  trigger

Although a histogram can be defined for it, nothing will happen until
an action tracing that event via the trace_synth() function occurs.
The trace_synth() function is very similar to all the other trace_*
invocations spread throughout the kernel, except in this case the
trace_ function and its corresponding tracepoint isn't statically
generated but defined by the user at run-time.

How this can be automatically hooked up via a hist trigger 'action' is
discussed in a subsequent patch.

Signed-off-by: Tom Zanussi <tom.zanu...@linux.intel.com>
---
 kernel/trace/trace_events_hist.c | 738 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 738 insertions(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 338a9d5..e11b3a3 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -20,10 +20,14 @@
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/rculist.h>
+#include <linux/tracefs.h>
 
 #include "tracing_map.h"
 #include "trace.h"
 
+#define SYNTH_SYSTEM           "synthetic"
+#define SYNTH_FIELDS_MAX       16
+
 struct hist_field;
 
 typedef u64 (*hist_field_fn_t) (struct hist_field *field,
@@ -261,6 +265,23 @@ struct hist_trigger_data {
        unsigned int                    n_actions;
 };
 
+struct synth_field {
+       char *type;
+       char *name;
+       unsigned int size;
+       bool is_signed;
+};
+
+struct synth_event {
+       struct list_head                        list;
+       char                                    *name;
+       struct synth_field                      **fields;
+       unsigned int                            n_fields;
+       struct trace_event_class                class;
+       struct trace_event_call                 call;
+       struct tracepoint                       *tp;
+};
+
 struct action_data;
 
 typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
@@ -273,6 +294,688 @@ struct action_data {
        unsigned int            var_ref_idx;
 };
 
+static LIST_HEAD(synth_event_list);
+static DEFINE_MUTEX(synth_event_mutex);
+
+struct synth_trace_event {
+       struct trace_entry      ent;
+       int                     n_fields;
+       u64                     fields[];
+};
+
+static int synth_event_define_fields(struct trace_event_call *call)
+{
+       struct synth_trace_event trace;
+       int offset = offsetof(typeof(trace), fields);
+       struct synth_event *event = call->data;
+       unsigned int i, size;
+       char *name, *type;
+       bool is_signed;
+       int ret = 0;
+
+       for (i = 0; i < event->n_fields; i++) {
+               size = event->fields[i]->size;
+               is_signed = event->fields[i]->is_signed;
+               type = event->fields[i]->type;
+               name = event->fields[i]->name;
+               ret = trace_define_field(call, type, name, offset, size,
+                                        is_signed, FILTER_OTHER);
+               offset += sizeof(u64);
+       }
+
+       return ret;
+}
+
+static enum print_line_t print_synth_event(struct trace_iterator *iter,
+                                          int flags,
+                                          struct trace_event *event)
+{
+       struct trace_array *tr = iter->tr;
+       struct trace_seq *s = &iter->seq;
+       struct synth_trace_event *entry;
+       struct synth_event *se;
+       unsigned int i;
+
+       entry = (struct synth_trace_event *)iter->ent;
+       se = container_of(event, struct synth_event, call.event);
+
+       trace_seq_printf(s, "%s: ", se->name);
+
+       for (i = 0; i < entry->n_fields; i++) {
+               if (trace_seq_has_overflowed(s))
+                       goto end;
+
+               /* parameter types */
+               if (tr->trace_flags & TRACE_ITER_VERBOSE)
+                       trace_seq_printf(s, "%s ", "u64");
+
+               /* parameter values */
+               trace_seq_printf(s, "%s=%llu%s", se->fields[i]->name,
+                                entry->fields[i],
+                                i == entry->n_fields - 1 ? "" : ", ");
+       }
+end:
+       trace_seq_putc(s, '\n');
+
+       return trace_handle_return(s);
+}
+
+static struct trace_event_functions synth_event_funcs = {
+       .trace          = print_synth_event
+};
+
+static notrace void trace_event_raw_event_synth(void *__data,
+                                               u64 *var_ref_vals,
+                                               unsigned int var_ref_idx)
+{
+       struct trace_event_file *trace_file = __data;
+       struct synth_trace_event *entry;
+       struct trace_event_buffer fbuffer;
+       int fields_size;
+       unsigned int i;
+
+       struct synth_event *event;
+
+       event = trace_file->event_call->data;
+
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+
+       fields_size = event->n_fields * sizeof(u64);
+
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+                                          sizeof(*entry) + fields_size);
+       if (!entry)
+               return;
+
+       entry->n_fields = event->n_fields;
+
+       for (i = 0; i < event->n_fields; i++)
+               entry->fields[i] = var_ref_vals[var_ref_idx + i];
+
+       trace_event_buffer_commit(&fbuffer);
+}
+
+static void free_synth_event_print_fmt(struct trace_event_call *call)
+{
+       if (call)
+               kfree(call->print_fmt);
+}
+
+static int __set_synth_event_print_fmt(struct synth_event *event,
+                                      char *buf, int len)
+{
+       int pos = 0;
+       int i;
+
+       /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+
+       pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+       for (i = 0; i < event->n_fields; i++) {
+               pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
+                               event->fields[i]->name, sizeof(u64),
+                               i == event->n_fields - 1 ? "" : ", ");
+       }
+       pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+
+       for (i = 0; i < event->n_fields; i++) {
+               pos += snprintf(buf + pos, LEN_OR_ZERO,
+                               ", ((u64)(REC->%s))", event->fields[i]->name);
+       }
+
+#undef LEN_OR_ZERO
+
+       /* return the length of print_fmt */
+       return pos;
+}
+
+static int set_synth_event_print_fmt(struct trace_event_call *call)
+{
+       struct synth_event *event = call->data;
+       char *print_fmt;
+       int len;
+
+       /* First: called with 0 length to calculate the needed length */
+       len = __set_synth_event_print_fmt(event, NULL, 0);
+
+       print_fmt = kmalloc(len + 1, GFP_KERNEL);
+       if (!print_fmt)
+               return -ENOMEM;
+
+       /* Second: actually write the @print_fmt */
+       __set_synth_event_print_fmt(event, print_fmt, len + 1);
+       call->print_fmt = print_fmt;
+
+       return 0;
+}
+
+int dynamic_trace_event_reg(struct trace_event_call *call,
+                           enum trace_reg type, void *data)
+{
+       struct trace_event_file *file = data;
+
+       WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
+       switch (type) {
+       case TRACE_REG_REGISTER:
+               return dynamic_tracepoint_probe_register(call->tp,
+                                                        call->class->probe,
+                                                        file);
+       case TRACE_REG_UNREGISTER:
+               tracepoint_probe_unregister(call->tp,
+                                           call->class->probe,
+                                           file, true);
+               return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+       case TRACE_REG_PERF_REGISTER:
+               return dynamic_tracepoint_probe_register(call->tp,
+                                                        
call->class->perf_probe,
+                                                        call);
+       case TRACE_REG_PERF_UNREGISTER:
+               tracepoint_probe_unregister(call->tp,
+                                           call->class->perf_probe,
+                                           call, true);
+               return 0;
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
+#endif
+       }
+       return 0;
+}
+
+static void free_synth_field(struct synth_field *field)
+{
+       kfree(field->type);
+       kfree(field->name);
+       kfree(field);
+}
+
+static bool synth_field_signed(char *type)
+{
+       if (strncmp(type, "u", 1) == 0)
+               return false;
+
+       return true;
+}
+
+static unsigned int synth_field_size(char *type)
+{
+       unsigned int size = 0;
+
+       if (strcmp(type, "s64") == 0)
+               size = sizeof(s64);
+       else if (strcmp(type, "u64") == 0)
+               size = sizeof(u64);
+       else if (strcmp(type, "s32") == 0)
+               size = sizeof(s32);
+       else if (strcmp(type, "u32") == 0)
+               size = sizeof(u32);
+       else if (strcmp(type, "s16") == 0)
+               size = sizeof(s16);
+       else if (strcmp(type, "u16") == 0)
+               size = sizeof(u16);
+       else if (strcmp(type, "s8") == 0)
+               size = sizeof(s8);
+       else if (strcmp(type, "u8") == 0)
+               size = sizeof(u8);
+       else if (strcmp(type, "char") == 0)
+               size = sizeof(char);
+       else if (strcmp(type, "unsigned char") == 0)
+               size = sizeof(unsigned char);
+       else if (strcmp(type, "int") == 0)
+               size = sizeof(int);
+       else if (strcmp(type, "unsigned int") == 0)
+               size = sizeof(unsigned int);
+       else if (strcmp(type, "long") == 0)
+               size = sizeof(long);
+       else if (strcmp(type, "unsigned long") == 0)
+               size = sizeof(unsigned long);
+       else if (strcmp(type, "pid_t") == 0)
+               size = sizeof(pid_t);
+       else if (strstr(type, "[") == 0)
+               size = sizeof(u64);
+
+       return size;
+}
+
+static struct synth_field *parse_synth_field(char *field_type,
+                                            char *field_name)
+{
+       struct synth_field *field;
+       int len, ret = 0;
+       char *array;
+
+       if (field_type[0] == ';')
+               field_type++;
+
+       len = strlen(field_name);
+       if (field_name[len - 1] == ';')
+               field_name[len - 1] = '\0';
+
+       field = kzalloc(sizeof(*field), GFP_KERNEL);
+       if (!field)
+               return ERR_PTR(-ENOMEM);
+
+       len = strlen(field_type) + 1;
+       array = strchr(field_name, '[');
+       if (array)
+               len += strlen(array);
+       field->type = kzalloc(len, GFP_KERNEL);
+       if (!field->type) {
+               ret = -ENOMEM;
+               goto free;
+       }
+       strcat(field->type, field_type);
+       if (array)
+               strcat(field->type, array);
+
+       field->size = synth_field_size(field->type);
+       if (!field->size) {
+               ret = -EINVAL;
+               goto free;
+       }
+
+       field->is_signed = synth_field_signed(field->type);
+
+       field->name = kstrdup(field_name, GFP_KERNEL);
+       if (!field->name) {
+               ret = -ENOMEM;
+               goto free;
+       }
+ out:
+       return field;
+ free:
+       free_synth_field(field);
+       field = ERR_PTR(ret);
+       goto out;
+}
+
+static void free_synth_tracepoint(struct tracepoint *tp)
+{
+       if (!tp)
+               return;
+
+       kfree(tp->name);
+       kfree(tp);
+}
+
+static struct tracepoint *alloc_synth_tracepoint(char *name)
+{
+       struct tracepoint *tp;
+       int ret = 0;
+
+       tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+       if (!tp) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       tp->name = kstrdup(name, GFP_KERNEL);
+       if (!tp->name) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       return tp;
+ free:
+       free_synth_tracepoint(tp);
+
+       return ERR_PTR(ret);
+}
+
+static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+                              unsigned int var_ref_idx)
+{
+       struct tracepoint *tp = event->tp;
+
+       if (unlikely(atomic_read(&tp->key.enabled) > 0)) {
+               struct tracepoint_func *it_func_ptr;
+               void *it_func;
+               void *__data;
+
+               if (!(cpu_online(raw_smp_processor_id())))
+                       return;
+
+               it_func_ptr = rcu_dereference_sched((tp)->funcs);
+               if (it_func_ptr) {
+                       do {
+                               it_func = (it_func_ptr)->func;
+                               __data = (it_func_ptr)->data;
+                               ((void(*)(void *__data, u64 *var_ref_vals, 
unsigned int var_ref_idx))(it_func))(__data, var_ref_vals, var_ref_idx);
+                       } while ((++it_func_ptr)->func);
+               }
+       }
+}
+
+static struct synth_event *find_synth_event(const char *name)
+{
+       struct synth_event *event;
+
+       list_for_each_entry(event, &synth_event_list, list) {
+               if (strcmp(event->name, name) == 0)
+                       return event;
+       }
+
+       return NULL;
+}
+
+static int register_synth_event(struct synth_event *event)
+{
+       struct trace_event_call *call = &event->call;
+       int ret = 0;
+
+       event->call.class = &event->class;
+       event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL);
+       if (!event->class.system) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       event->tp = alloc_synth_tracepoint(event->name);
+       if (IS_ERR(event->tp)) {
+               ret = PTR_ERR(event->tp);
+               event->tp = NULL;
+               goto out;
+       }
+
+       INIT_LIST_HEAD(&call->class->fields);
+       call->event.funcs = &synth_event_funcs;
+       call->class->define_fields = synth_event_define_fields;
+
+       ret = register_trace_event(&call->event);
+       if (!ret) {
+               ret = -ENODEV;
+               goto out;
+       }
+       call->flags = TRACE_EVENT_FL_TRACEPOINT;
+       call->class->reg = dynamic_trace_event_reg;
+       call->class->probe = trace_event_raw_event_synth;
+       call->data = event;
+       call->tp = event->tp;
+       ret = trace_add_event_call(call);
+       if (ret) {
+               pr_warn("Failed to register synthetic event: %s\n",
+                       trace_event_name(call));
+               goto err;
+       }
+
+       ret = set_synth_event_print_fmt(call);
+       if (ret < 0) {
+               trace_remove_event_call(call);
+               goto err;
+       }
+ out:
+       return ret;
+ err:
+       unregister_trace_event(&call->event);
+       goto out;
+}
+
+static int unregister_synth_event(struct synth_event *event)
+{
+       struct trace_event_call *call = &event->call;
+       int ret;
+
+       ret = trace_remove_event_call(call);
+       if (ret) {
+               pr_warn("Failed to remove synthetic event: %s\n",
+                       trace_event_name(call));
+               free_synth_event_print_fmt(call);
+               unregister_trace_event(&call->event);
+       }
+
+       return ret;
+}
+
+static void remove_synth_event(struct synth_event *event)
+{
+       unregister_synth_event(event);
+       list_del(&event->list);
+}
+
+static int add_synth_event(struct synth_event *event)
+{
+       int ret;
+
+       ret = register_synth_event(event);
+       if (ret)
+               return ret;
+
+       list_add(&event->list, &synth_event_list);
+
+       return 0;
+}
+
+static void free_synth_event(struct synth_event *event)
+{
+       unsigned int i;
+
+       if (!event)
+               return;
+
+       for (i = 0; i < event->n_fields; i++)
+               free_synth_field(event->fields[i]);
+
+       kfree(event->fields);
+       kfree(event->name);
+       kfree(event->class.system);
+       free_synth_tracepoint(event->tp);
+       free_synth_event_print_fmt(&event->call);
+       kfree(event);
+}
+
+static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
+                                            struct synth_field **fields)
+{
+       struct synth_event *event;
+       unsigned int i;
+
+       event = kzalloc(sizeof(*event), GFP_KERNEL);
+       if (!event) {
+               event = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       event->name = kstrdup(event_name, GFP_KERNEL);
+       if (!event->name) {
+               kfree(event);
+               event = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       event->fields = kcalloc(n_fields, sizeof(event->fields), GFP_KERNEL);
+       if (!event->fields) {
+               free_synth_event(event);
+               event = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       for (i = 0; i < n_fields; i++)
+               event->fields[i] = fields[i];
+
+       event->n_fields = n_fields;
+ out:
+       return event;
+}
+
+static int create_synth_event(int argc, char **argv)
+{
+       struct synth_field *fields[SYNTH_FIELDS_MAX];
+       struct synth_event *event = NULL;
+       bool delete_event = false;
+       int i, n_fields = 0, ret = 0;
+       char *name;
+
+       mutex_lock(&synth_event_mutex);
+
+       /*
+        * Argument syntax:
+        *  - Add synthetic event: <event_name> field[;field] ...
+        *  - Remove synthetic event: !<event_name> field[;field] ...
+        *      where 'field' = type field_name
+        */
+       if (argc < 1) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       name = argv[0];
+       if (name[0] == '!') {
+               delete_event = true;
+               name++;
+       }
+
+       event = find_synth_event(name);
+       if (event) {
+               if (delete_event) {
+                       remove_synth_event(event);
+                       goto err;
+               } else
+                       ret = -EEXIST;
+               goto out;
+       } else if (delete_event) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (argc < 2) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       for (i = 1; i < argc - 1; i++) {
+               if (strcmp(argv[i], ";") == 0)
+                       continue;
+               if (n_fields == SYNTH_FIELDS_MAX) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               fields[n_fields] = parse_synth_field(argv[i], argv[i + 1]);
+               if (!fields[n_fields])
+                       goto err;
+               i++; n_fields++;
+       }
+       if (i < argc) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       event = alloc_synth_event(name, n_fields, fields);
+       if (IS_ERR(event)) {
+               ret = PTR_ERR(event);
+               event = NULL;
+               goto err;
+       }
+
+       add_synth_event(event);
+ out:
+       mutex_unlock(&synth_event_mutex);
+
+       return ret;
+ err:
+       for (i = 0; i < n_fields; i++)
+               free_synth_field(fields[i]);
+       free_synth_event(event);
+
+       goto out;
+}
+
+static int release_all_synth_events(void)
+{
+       struct synth_event *event, *e;
+       int ret = 0;
+
+       mutex_lock(&synth_event_mutex);
+
+       list_for_each_entry_safe(event, e, &synth_event_list, list) {
+               remove_synth_event(event);
+               free_synth_event(event);
+       }
+
+       mutex_unlock(&synth_event_mutex);
+
+       return ret;
+}
+
+
+static void *synth_events_seq_start(struct seq_file *m, loff_t *pos)
+{
+       mutex_lock(&synth_event_mutex);
+
+       return seq_list_start(&synth_event_list, *pos);
+}
+
+static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return seq_list_next(v, &synth_event_list, pos);
+}
+
+static void synth_events_seq_stop(struct seq_file *m, void *v)
+{
+       mutex_unlock(&synth_event_mutex);
+}
+
+static int synth_events_seq_show(struct seq_file *m, void *v)
+{
+       struct synth_field *field;
+       struct synth_event *event = v;
+       unsigned int i;
+
+       seq_printf(m, "%s\t", event->name);
+
+       for (i = 0; i < event->n_fields; i++) {
+               field = event->fields[i];
+
+               /* parameter values */
+               seq_printf(m, "%s %s%s", field->type, field->name,
+                          i == event->n_fields - 1 ? "" : "; ");
+       }
+
+       seq_putc(m, '\n');
+
+       return 0;
+}
+
+static const struct seq_operations synth_events_seq_op = {
+       .start  = synth_events_seq_start,
+       .next   = synth_events_seq_next,
+       .stop   = synth_events_seq_stop,
+       .show   = synth_events_seq_show
+};
+
+static int synth_events_open(struct inode *inode, struct file *file)
+{
+       int ret;
+
+       if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+               ret = release_all_synth_events();
+               if (ret < 0)
+                       return ret;
+       }
+
+       return seq_open(file, &synth_events_seq_op);
+}
+
+static ssize_t synth_events_write(struct file *file,
+                                 const char __user *buffer,
+                                 size_t count, loff_t *ppos)
+{
+       return trace_parse_run_command(file, buffer, count, ppos,
+                                      create_synth_event);
+}
+
+static const struct file_operations synth_events_fops = {
+       .open           = synth_events_open,
+       .write          = synth_events_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
 static u64 hist_field_timestamp(struct hist_field *hist_field,
                                struct tracing_map_elt *elt,
                                struct ring_buffer_event *rbe,
@@ -3028,3 +3731,38 @@ __init int 
register_trigger_hist_enable_disable_cmds(void)
 
        return ret;
 }
+
+static __init int trace_events_hist_init(void)
+{
+       struct dentry *entry = NULL;
+       struct trace_array *tr;
+       struct dentry *d_tracer;
+       int err = 0;
+
+       tr = top_trace_array();
+       if (!tr) {
+               err = -ENODEV;
+               goto err;
+       }
+
+       d_tracer = tracing_init_dentry();
+       if (IS_ERR(d_tracer)) {
+               err = PTR_ERR(d_tracer);
+               goto err;
+       }
+
+       entry = tracefs_create_file("synthetic_events", 0644, d_tracer,
+                                   tr, &synth_events_fops);
+       if (!entry) {
+               err = -ENODEV;
+               goto err;
+       }
+
+       return err;
+ err:
+       pr_warn("Could not create tracefs 'synthetic_events' entry\n");
+
+       return err;
+}
+
+fs_initcall(trace_events_hist_init);
-- 
1.9.3

Reply via email to