Introduce a new tunable nested_precise_ts that allows precise time
stamps to be recorded for nested trace writes. 

The tunable is: /sys/kernel/debug/tracing/nested_precise_ts. By default,
it is zero and the behavior is disabled. Set the tunable to 1 in order
to get accurate timestamps.

Signed-off-by: Suresh Warrier <warr...@linux.vnet.ibm.com>
---
 Documentation/trace/ftrace.txt |  8 +++++++
 include/linux/ring_buffer.h    |  9 ++++++++
 kernel/trace/ring_buffer.c     | 14 ++++++++++++
 kernel/trace/trace.c           | 51 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bfe8c29..4d1e83b 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -264,6 +264,14 @@ of ftrace. Here is a list of some of the key files:
        one will show only the first kernel function that is called
        from user space.

+  nested_precise_ts:
+
+       When set, it will generate precise timestamps even for nested
+       writers which otherwise usually all get the same timestamp as
+       the initial preempted writer. However, this will require briefly
+       disabling interrupts on the local CPU for each trace event, which
+       can be expensive on some architectures.
+
   printk_formats:

        This is for tools that read the raw format files. If an event in
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 49a4d6f..682147a 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
+#include <linux/jump_label.h>

 struct ring_buffer;
 struct ring_buffer_iter;
@@ -97,6 +98,14 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, 
struct lock_class_key *k
        __ring_buffer_alloc((size), (flags), &__key);   \
 })

+extern struct static_key __precise_nested_write_ts;
+static inline bool rb_precise_nested_write_ts(void)
+{
+       return static_key_false(&__precise_nested_write_ts);
+}
+void rb_enable_precise_nested_write_ts(void);
+void rb_disable_precise_nested_write_ts(void);
+
 int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
                          struct file *filp, poll_table *poll_table);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3d9fee3..c9b3005 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2520,6 +2520,20 @@ static inline void rb_end_commit(struct 
ring_buffer_per_cpu *cpu_buffer)
        }
 }

+struct static_key __precise_nested_write_ts;
+
+void rb_enable_precise_nested_write_ts(void)
+{
+       if (!rb_precise_nested_write_ts())
+               static_key_slow_inc(&__precise_nested_write_ts);
+}
+
+void rb_disable_precise_nested_write_ts(void)
+{
+       if (rb_precise_nested_write_ts())
+               static_key_slow_dec(&__precise_nested_write_ts);
+}
+
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer *buffer,
                      struct ring_buffer_per_cpu *cpu_buffer,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 18cdf91..f27dcde 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3486,6 +3486,7 @@ static const char readme_msg[] =
        "\t\t\t  Remove sub-buffer with rmdir\n"
        "  trace_options\t\t- Set format or modify how tracing happens\n"
        "\t\t\t  Disable an option by adding a suffix 'no' to the option name\n"
+       "  nested_precise_ts\t- Enable/disable precise timestamps for nested 
writes\n"
 #ifdef CONFIG_DYNAMIC_FTRACE
        "\n  available_filter_functions - list of functions that can be 
filtered on\n"
        "  set_ftrace_filter\t- echo function name in here to only trace these 
functions\n"
@@ -4680,6 +4681,45 @@ static int tracing_clock_open(struct inode *inode, 
struct file *file)
        return ret;
 }

+static ssize_t
+tracing_nested_precise_read(struct file *filp, char __user *ubuf,
+              size_t cnt, loff_t *ppos)
+{
+       char buf[64];
+       int r;
+
+       r = rb_precise_nested_write_ts() ? 1 : 0;
+       r = sprintf(buf, "%d\n", r);
+
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_nested_precise_write(struct file *filp, const char __user *ubuf,
+               size_t cnt, loff_t *ppos)
+{
+       struct trace_array *tr = filp->private_data;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
+       unsigned long val;
+       int ret;
+
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
+               return ret;
+
+       if (buffer) {
+               mutex_lock(&trace_types_lock);
+               if (val)
+                       rb_enable_precise_nested_write_ts();
+               else
+                       rb_disable_precise_nested_write_ts();
+               mutex_unlock(&trace_types_lock);
+       }
+
+       (*ppos)++;
+
+       return cnt;
+}
 struct ftrace_buffer_info {
        struct trace_iterator   iter;
        void                    *spare;
@@ -4910,6 +4950,14 @@ static const struct file_operations trace_clock_fops = {
        .write          = tracing_clock_write,
 };

+static const struct file_operations tracing_nested_precise_fops = {
+       .open           = tracing_open_generic_tr,
+       .read           = tracing_nested_precise_read,
+       .write          = tracing_nested_precise_write,
+       .llseek         = generic_file_llseek,
+       .release        = tracing_release_generic_tr,
+};
+
 #ifdef CONFIG_TRACER_SNAPSHOT
 static const struct file_operations snapshot_fops = {
        .open           = tracing_snapshot_open,
@@ -6160,6 +6208,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry 
*d_tracer)
        trace_create_file("trace_clock", 0644, d_tracer, tr,
                          &trace_clock_fops);

+       trace_create_file("nested_precise_ts", 0644, d_tracer,
+                         tr, &tracing_nested_precise_fops);
+
        trace_create_file("tracing_on", 0644, d_tracer,
                          tr, &rb_simple_fops);

-- 1.8.3.4 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to