Allow reading the trace file for trace remotes. This performs a
non-consuming read of the trace buffer.

Signed-off-by: Vincent Donnefort <[email protected]>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1fcbdea992d2..d6eee96a6789 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4605,7 +4605,7 @@ static int s_show(struct seq_file *m, void *v)
  * Should be used after trace_array_get(), trace_types_lock
  * ensures that i_cdev was already initialized.
  */
-static inline int tracing_get_cpu(struct inode *inode)
+int tracing_get_cpu(struct inode *inode)
 {
        if (inode->i_cdev) /* See trace_create_cpu_file() */
                return (long)inode->i_cdev - 1;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fdbcd068ba38..aa397acb6be9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -686,6 +686,7 @@ struct dentry *trace_create_cpu_file(const char *name,
                                     void *data,
                                     long cpu,
                                     const struct file_operations *fops);
+int tracing_get_cpu(struct inode *inode);
 
 
 /**
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index 82060bb19ba1..e03fde73aac7 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -18,14 +18,23 @@
 #define TRACEFS_MODE_WRITE     0640
 #define TRACEFS_MODE_READ      0440
 
+enum tri_type {
+       TRI_CONSUMING,
+       TRI_NONCONSUMING,
+};
+
 struct trace_remote_iterator {
        struct trace_remote             *remote;
        struct trace_seq                seq;
        struct delayed_work             poll_work;
        unsigned long                   lost_events;
        u64                             ts;
+       struct ring_buffer_iter         *rb_iter;
+       struct ring_buffer_iter         **rb_iters;
        int                             cpu;
        int                             evt_cpu;
+       loff_t                          pos;
+       enum tri_type                   type;
 };
 
 struct trace_remote {
@@ -36,6 +45,8 @@ struct trace_remote {
        unsigned long                   trace_buffer_size;
        struct ring_buffer_remote       rb_remote;
        struct mutex                    lock;
+       struct rw_semaphore             reader_lock;
+       struct rw_semaphore             *pcpu_reader_locks;
        unsigned int                    nr_readers;
        unsigned int                    poll_ms;
        bool                            tracing_on;
@@ -230,6 +241,20 @@ static int trace_remote_get(struct trace_remote *remote, 
int cpu)
        if (ret)
                return ret;
 
+       if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
+               int lock_cpu;
+
+               remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, 
sizeof(*remote->pcpu_reader_locks),
+                                                   GFP_KERNEL);
+               if (!remote->pcpu_reader_locks) {
+                       trace_remote_try_unload(remote);
+                       return -ENOMEM;
+               }
+
+               for_each_possible_cpu(lock_cpu)
+                       init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
+       }
+
        remote->nr_readers++;
 
        return 0;
@@ -244,6 +269,9 @@ static void trace_remote_put(struct trace_remote *remote)
        if (remote->nr_readers)
                return;
 
+       kfree(remote->pcpu_reader_locks);
+       remote->pcpu_reader_locks = NULL;
+
        trace_remote_try_unload(remote);
 }
 
@@ -258,13 +286,58 @@ static void __poll_remote(struct work_struct *work)
                              msecs_to_jiffies(iter->remote->poll_ms));
 }
 
-static struct trace_remote_iterator *trace_remote_iter(struct trace_remote 
*remote, int cpu)
+static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int 
cpu)
+{
+       if (!iter->rb_iter)
+               return;
+
+       if (cpu != RING_BUFFER_ALL_CPUS) {
+               ring_buffer_read_finish(iter->rb_iter);
+               return;
+       }
+
+       for_each_possible_cpu(cpu) {
+               if (iter->rb_iters[cpu])
+                       ring_buffer_read_finish(iter->rb_iters[cpu]);
+       }
+
+       kfree(iter->rb_iters);
+}
+
+static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int 
cpu)
+{
+       if (cpu != RING_BUFFER_ALL_CPUS) {
+               iter->rb_iter = 
ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
+
+               return iter->rb_iter ? 0 : -ENOMEM;
+       }
+
+       iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), 
GFP_KERNEL);
+       if (!iter->rb_iters)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               iter->rb_iters[cpu] = 
ring_buffer_read_start(iter->remote->trace_buffer, cpu,
+                                                            GFP_KERNEL);
+               if (!iter->rb_iters[cpu]) {
+                       __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
+static struct trace_remote_iterator
+*trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
 {
        struct trace_remote_iterator *iter = NULL;
        int ret;
 
        lockdep_assert_held(&remote->lock);
 
+       if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
+               return NULL;
 
        ret = trace_remote_get(remote, cpu);
        if (ret)
@@ -279,9 +352,21 @@ static struct trace_remote_iterator 
*trace_remote_iter(struct trace_remote *remo
        if (iter) {
                iter->remote = remote;
                iter->cpu = cpu;
+               iter->type = type;
                trace_seq_init(&iter->seq);
-               INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
-               schedule_delayed_work(&iter->poll_work, 
msecs_to_jiffies(remote->poll_ms));
+
+               switch (type) {
+               case TRI_CONSUMING:
+                       INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
+                       schedule_delayed_work(&iter->poll_work, 
msecs_to_jiffies(remote->poll_ms));
+                       break;
+               case TRI_NONCONSUMING:
+                       ret = __alloc_ring_buffer_iter(iter, cpu);
+                       break;
+               }
+
+               if (ret)
+                       goto err;
 
                return iter;
        }
@@ -305,10 +390,100 @@ static void trace_remote_iter_free(struct 
trace_remote_iterator *iter)
 
        lockdep_assert_held(&remote->lock);
 
+       switch (iter->type) {
+       case TRI_CONSUMING:
+               cancel_delayed_work_sync(&iter->poll_work);
+               break;
+       case TRI_NONCONSUMING:
+               __free_ring_buffer_iter(iter, iter->cpu);
+               break;
+       }
+
        kfree(iter);
        trace_remote_put(remote);
 }
 
+static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
+{
+       struct trace_remote *remote = iter->remote;
+       int cpu = iter->cpu;
+
+       /* Acquire global reader lock */
+       if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+               down_write(&remote->reader_lock);
+       else
+               down_read(&remote->reader_lock);
+
+       if (cpu == RING_BUFFER_ALL_CPUS)
+               return;
+
+       /*
+        * No need for the remote lock here, iter holds a reference on
+        * remote->nr_readers
+        */
+
+       /* Get the per-CPU one */
+       if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
+               return;
+
+       if (iter->type == TRI_CONSUMING)
+               down_write(&remote->pcpu_reader_locks[cpu]);
+       else
+               down_read(&remote->pcpu_reader_locks[cpu]);
+}
+
+static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
+{
+       struct trace_remote *remote = iter->remote;
+       int cpu = iter->cpu;
+
+       /* Release per-CPU reader lock */
+       if (cpu != RING_BUFFER_ALL_CPUS) {
+               /*
+                * No need for the remote lock here, iter holds a reference on
+                * remote->nr_readers
+                */
+               if (iter->type == TRI_CONSUMING)
+                       up_write(&remote->pcpu_reader_locks[cpu]);
+               else
+                       up_read(&remote->pcpu_reader_locks[cpu]);
+       }
+
+       /* Release global reader lock */
+       if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+               up_write(&remote->reader_lock);
+       else
+               up_read(&remote->reader_lock);
+}
+
+static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator 
*iter, int cpu)
+{
+       return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : 
iter->rb_iters[cpu];
+}
+
+static struct ring_buffer_event *
+__peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned 
long *lost_events)
+{
+       struct ring_buffer_event *rb_evt;
+       struct ring_buffer_iter *rb_iter;
+
+       switch (iter->type) {
+       case TRI_CONSUMING:
+               return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, 
lost_events);
+       case TRI_NONCONSUMING:
+               rb_iter = __get_rb_iter(iter, cpu);
+               rb_evt = ring_buffer_iter_peek(rb_iter, ts);
+               if (!rb_evt)
+                       return NULL;
+
+               *lost_events = ring_buffer_iter_dropped(rb_iter);
+
+               return rb_evt;
+       }
+
+       return NULL;
+}
+
 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
 {
        struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
@@ -318,7 +493,7 @@ static bool trace_remote_iter_read_event(struct 
trace_remote_iterator *iter)
                if (ring_buffer_empty_cpu(trace_buffer, cpu))
                        return false;
 
-               if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, 
&iter->lost_events))
+               if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
                        return false;
 
                iter->evt_cpu = cpu;
@@ -333,7 +508,7 @@ static bool trace_remote_iter_read_event(struct 
trace_remote_iterator *iter)
                if (ring_buffer_empty_cpu(trace_buffer, cpu))
                        continue;
 
-               if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
+               if (!__peek_event(iter, cpu, &ts, &lost_events))
                        continue;
 
                if (ts >= iter->ts)
@@ -347,6 +522,20 @@ static bool trace_remote_iter_read_event(struct 
trace_remote_iterator *iter)
        return iter->ts != U64_MAX;
 }
 
+static void trace_remote_iter_move(struct trace_remote_iterator *iter)
+{
+       struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+
+       switch (iter->type) {
+       case TRI_CONSUMING:
+               ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+               break;
+       case TRI_NONCONSUMING:
+               ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
+               break;
+       }
+}
+
 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
 {
        unsigned long usecs_rem;
@@ -369,13 +558,14 @@ static int trace_pipe_open(struct inode *inode, struct 
file *filp)
 {
        struct trace_remote *remote = inode->i_private;
        struct trace_remote_iterator *iter;
-       int cpu = RING_BUFFER_ALL_CPUS;
-
-       if (inode->i_cdev)
-               cpu = (long)inode->i_cdev - 1;
+       int cpu = tracing_get_cpu(inode);
 
        guard(mutex)(&remote->lock);
-       iter = trace_remote_iter(remote, cpu);
+
+       iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
        filp->private_data = iter;
 
        return IS_ERR(iter) ? PTR_ERR(iter) : 0;
@@ -410,6 +600,8 @@ static ssize_t trace_pipe_read(struct file *filp, char 
__user *ubuf, size_t cnt,
        if (ret < 0)
                return ret;
 
+       trace_remote_iter_read_start(iter);
+
        while (trace_remote_iter_read_event(iter)) {
                int prev_len = iter->seq.seq.len;
 
@@ -418,9 +610,11 @@ static ssize_t trace_pipe_read(struct file *filp, char 
__user *ubuf, size_t cnt,
                        break;
                }
 
-               ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+               trace_remote_iter_move(iter);
        }
 
+       trace_remote_iter_read_finished(iter);
+
        goto copy_to_user;
 }
 
@@ -430,14 +624,123 @@ static const struct file_operations trace_pipe_fops = {
        .release        = trace_pipe_release,
 };
 
+static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct trace_remote_iterator *iter = m->private;
+
+       ++*pos;
+
+       if (!iter || !trace_remote_iter_read_event(iter))
+               return NULL;
+
+       trace_remote_iter_move(iter);
+       iter->pos++;
+
+       return iter;
+}
+
+static void *trace_start(struct seq_file *m, loff_t *pos)
+{
+       struct trace_remote_iterator *iter = m->private;
+       loff_t i;
+
+       if (!iter)
+               return NULL;
+
+       if (!*pos) {
+               iter->pos = -1;
+               return trace_next(m, NULL, &i);
+       }
+
+       i = iter->pos;
+       while (i < *pos) {
+               iter = trace_next(m, NULL, &i);
+               if (!iter)
+                       return NULL;
+       }
+
+       return iter;
+}
+
+static int trace_show(struct seq_file *m, void *v)
+{
+       struct trace_remote_iterator *iter = v;
+
+       trace_seq_init(&iter->seq);
+
+       if (trace_remote_iter_print_event(iter)) {
+               seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
+               return 0;
+       }
+
+       return trace_print_seq(m, &iter->seq);
+}
+
+static void trace_stop(struct seq_file *s, void *v) { }
+
+static const struct seq_operations trace_sops = {
+       .start          = trace_start,
+       .next           = trace_next,
+       .show           = trace_show,
+       .stop           = trace_stop,
+};
+
+static int trace_open(struct inode *inode, struct file *filp)
+{
+       struct trace_remote *remote = inode->i_private;
+       struct trace_remote_iterator *iter = NULL;
+       int cpu = tracing_get_cpu(inode);
+       int ret;
+
+       if (!(filp->f_mode & FMODE_READ))
+               return 0;
+
+       guard(mutex)(&remote->lock);
+
+       iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       ret = seq_open(filp, &trace_sops);
+       if (ret) {
+               trace_remote_iter_free(iter);
+               return ret;
+       }
+
+       if (iter)
+               trace_remote_iter_read_start(iter);
+
+       ((struct seq_file *)filp->private_data)->private = (void *)iter;
+
+       return 0;
+}
+
+static int trace_release(struct inode *inode, struct file *filp)
+{
+       struct trace_remote_iterator *iter;
+
+       if (!(filp->f_mode & FMODE_READ))
+               return 0;
+
+       iter = ((struct seq_file *)filp->private_data)->private;
+       seq_release(inode, filp);
+
+       if (!iter)
+               return 0;
+
+       guard(mutex)(&iter->remote->lock);
+
+       trace_remote_iter_read_finished(iter);
+       trace_remote_iter_free(iter);
+
+       return 0;
+}
+
 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t 
cnt, loff_t *ppos)
 {
        struct inode *inode = file_inode(filp);
        struct trace_remote *remote = inode->i_private;
-       int cpu = RING_BUFFER_ALL_CPUS;
-
-       if (inode->i_cdev)
-               cpu = (long)inode->i_cdev - 1;
+       int cpu = tracing_get_cpu(inode);
 
        guard(mutex)(&remote->lock);
 
@@ -447,7 +750,11 @@ static ssize_t trace_write(struct file *filp, const char 
__user *ubuf, size_t cn
 }
 
 static const struct file_operations trace_fops = {
+       .open           = trace_open,
        .write          = trace_write,
+       .read           = seq_read,
+       .read_iter      = seq_read_iter,
+       .release        = trace_release,
 };
 
 static int trace_remote_init_tracefs(const char *name, struct trace_remote 
*remote)
@@ -566,6 +873,7 @@ int trace_remote_register(const char *name, struct 
trace_remote_callbacks *cbs,
        remote->trace_buffer_size = 7 << 10;
        remote->poll_ms = 100;
        mutex_init(&remote->lock);
+       init_rwsem(&remote->reader_lock);
 
        if (trace_remote_init_tracefs(name, remote)) {
                kfree(remote);
-- 
2.53.0.rc1.225.gd81095ad13-goog


Reply via email to