Introduce a new tunable nested_precise_ts that allows precise time stamps to be recorded for nested trace writes.
The tunable is: /sys/kernel/debug/tracing/nested_precise_ts. By default, it is zero and the behavior is disabled. Set the tunable to 1 in order to get accurate timestamps. Signed-off-by: Suresh Warrier <warr...@linux.vnet.ibm.com> --- Documentation/trace/ftrace.txt | 8 +++++++ include/linux/ring_buffer.h | 9 ++++++++ kernel/trace/ring_buffer.c | 14 ++++++++++++ kernel/trace/trace.c | 51 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index bfe8c29..4d1e83b 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -264,6 +264,14 @@ of ftrace. Here is a list of some of the key files: one will show only the first kernel function that is called from user space. + nested_precise_ts: + + When set, it will generate precise timestamps even for nested + writers which otherwise usually all get the same timestamp as + the initial preempted writer. However, this will require briefly + disabling interrupts on the local CPU for each trace event, which + can be expensive on some architectures. + printk_formats: This is for tools that read the raw format files. If an event in diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 49a4d6f..682147a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/seq_file.h> #include <linux/poll.h> +#include <linux/jump_label.h> struct ring_buffer; struct ring_buffer_iter; @@ -97,6 +98,14 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +extern struct static_key __precise_nested_write_ts; +static inline bool rb_precise_nested_write_ts(void) +{ + return static_key_false(&__precise_nested_write_ts); +} +void rb_enable_precise_nested_write_ts(void); +void rb_disable_precise_nested_write_ts(void); + int ring_buffer_wait(struct ring_buffer *buffer, int cpu); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3d9fee3..c9b3005 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2520,6 +2520,20 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) } } +struct static_key __precise_nested_write_ts; + +void rb_enable_precise_nested_write_ts(void) +{ + if (!rb_precise_nested_write_ts()) + static_key_slow_inc(&__precise_nested_write_ts); +} + +void rb_disable_precise_nested_write_ts(void) +{ + if (rb_precise_nested_write_ts()) + static_key_slow_dec(&__precise_nested_write_ts); +} + static struct ring_buffer_event * rb_reserve_next_event(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 18cdf91..f27dcde 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3486,6 +3486,7 @@ static const char readme_msg[] = "\t\t\t Remove sub-buffer with rmdir\n" " trace_options\t\t- Set format or modify how tracing happens\n" "\t\t\t Disable an option by adding a suffix 'no' to the option name\n" + " nested_precise_ts\t- Enable/disable precise timestamps for nested writes\n" #ifdef CONFIG_DYNAMIC_FTRACE "\n available_filter_functions - list of functions that can be filtered on\n" " set_ftrace_filter\t- echo function name in here to only trace these functions\n" @@ -4680,6 +4681,45 @@ static int tracing_clock_open(struct inode *inode, struct file *file) return ret; } +static ssize_t +tracing_nested_precise_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = rb_precise_nested_write_ts() ? 1 : 0; + r = sprintf(buf, "%d\n", r); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +tracing_nested_precise_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + struct ring_buffer *buffer = tr->trace_buffer.buffer; + unsigned long val; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + if (buffer) { + mutex_lock(&trace_types_lock); + if (val) + rb_enable_precise_nested_write_ts(); + else + rb_disable_precise_nested_write_ts(); + mutex_unlock(&trace_types_lock); + } + + (*ppos)++; + + return cnt; +} struct ftrace_buffer_info { struct trace_iterator iter; void *spare; @@ -4910,6 +4950,14 @@ static const struct file_operations trace_clock_fops = { .write = tracing_clock_write, }; +static const struct file_operations tracing_nested_precise_fops = { + .open = tracing_open_generic_tr, + .read = tracing_nested_precise_read, + .write = tracing_nested_precise_write, + .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, +}; + #ifdef CONFIG_TRACER_SNAPSHOT static const struct file_operations snapshot_fops = { .open = tracing_snapshot_open, @@ -6160,6 +6208,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_clock", 0644, d_tracer, tr, &trace_clock_fops); + trace_create_file("nested_precise_ts", 0644, d_tracer, + tr, &tracing_nested_precise_fops); + trace_create_file("tracing_on", 0644, d_tracer, tr, &rb_simple_fops); -- 1.8.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/