On Sat, 31 Jan 2026 13:28:25 +0000
Vincent Donnefort <[email protected]> wrote:

> -static struct trace_remote_iterator *trace_remote_iter(struct trace_remote 
> *remote, int cpu)
> +static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int 
> cpu)
> +{
> +     if (!iter->rb_iter)
> +             return;

Hmm, can't iter->rb_iter be NULL when iter->rb_iters[] is used?

> +
> +     if (cpu != RING_BUFFER_ALL_CPUS) {
> +             ring_buffer_read_finish(iter->rb_iter);
> +             return;
> +     }
> +
> +     for_each_possible_cpu(cpu) {
> +             if (iter->rb_iters[cpu])
> +                     ring_buffer_read_finish(iter->rb_iters[cpu]);
> +     }
> +
> +     kfree(iter->rb_iters);
> +}
> +
> +static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int 
> cpu)
> +{
> +     if (cpu != RING_BUFFER_ALL_CPUS) {
> +             iter->rb_iter = 
> ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
> +
> +             return iter->rb_iter ? 0 : -ENOMEM;
> +     }
> +
> +     iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), 
> GFP_KERNEL);
> +     if (!iter->rb_iters)
> +             return -ENOMEM;
> +
> +     for_each_possible_cpu(cpu) {
> +             iter->rb_iters[cpu] = 
> ring_buffer_read_start(iter->remote->trace_buffer, cpu,
> +                                                          GFP_KERNEL);
> +             if (!iter->rb_iters[cpu]) {
> +                     __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);

For instance, we call __free_ring_buffer_iter() here, but I don't see
iter->rb_iter being set.

-- Steve


> +                     return -ENOMEM;
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +static struct trace_remote_iterator
> +*trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
>  {
>       struct trace_remote_iterator *iter = NULL;
>       int ret;
>  
>       lockdep_assert_held(&remote->lock);
>  
> +     if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
> +             return NULL;
>  
>       ret = trace_remote_get(remote, cpu);
>       if (ret)
> @@ -279,9 +352,21 @@ static struct trace_remote_iterator 
> *trace_remote_iter(struct trace_remote *remo
>       if (iter) {
>               iter->remote = remote;
>               iter->cpu = cpu;
> +             iter->type = type;
>               trace_seq_init(&iter->seq);
> -             INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
> -             schedule_delayed_work(&iter->poll_work, 
> msecs_to_jiffies(remote->poll_ms));
> +
> +             switch (type) {
> +             case TRI_CONSUMING:
> +                     INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
> +                     schedule_delayed_work(&iter->poll_work, 
> msecs_to_jiffies(remote->poll_ms));
> +                     break;
> +             case TRI_NONCONSUMING:
> +                     ret = __alloc_ring_buffer_iter(iter, cpu);
> +                     break;
> +             }
> +
> +             if (ret)
> +                     goto err;
>  
>               return iter;
>       }
> @@ -305,10 +390,100 @@ static void trace_remote_iter_free(struct 
> trace_remote_iterator *iter)
>  
>       lockdep_assert_held(&remote->lock);
>  
> +     switch (iter->type) {
> +     case TRI_CONSUMING:
> +             cancel_delayed_work_sync(&iter->poll_work);
> +             break;
> +     case TRI_NONCONSUMING:
> +             __free_ring_buffer_iter(iter, iter->cpu);
> +             break;
> +     }
> +
>       kfree(iter);
>       trace_remote_put(remote);
>  }
>  
> +static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
> +{
> +     struct trace_remote *remote = iter->remote;
> +     int cpu = iter->cpu;
> +
> +     /* Acquire global reader lock */
> +     if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
> +             down_write(&remote->reader_lock);
> +     else
> +             down_read(&remote->reader_lock);
> +
> +     if (cpu == RING_BUFFER_ALL_CPUS)
> +             return;
> +
> +     /*
> +      * No need for the remote lock here, iter holds a reference on
> +      * remote->nr_readers
> +      */
> +
> +     /* Get the per-CPU one */
> +     if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
> +             return;
> +
> +     if (iter->type == TRI_CONSUMING)
> +             down_write(&remote->pcpu_reader_locks[cpu]);
> +     else
> +             down_read(&remote->pcpu_reader_locks[cpu]);
> +}
> +
> +static void trace_remote_iter_read_finished(struct trace_remote_iterator 
> *iter)
> +{
> +     struct trace_remote *remote = iter->remote;
> +     int cpu = iter->cpu;
> +
> +     /* Release per-CPU reader lock */
> +     if (cpu != RING_BUFFER_ALL_CPUS) {
> +             /*
> +              * No need for the remote lock here, iter holds a reference on
> +              * remote->nr_readers
> +              */
> +             if (iter->type == TRI_CONSUMING)
> +                     up_write(&remote->pcpu_reader_locks[cpu]);
> +             else
> +                     up_read(&remote->pcpu_reader_locks[cpu]);
> +     }
> +
> +     /* Release global reader lock */
> +     if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
> +             up_write(&remote->reader_lock);
> +     else
> +             up_read(&remote->reader_lock);
> +}
> +
> +static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator 
> *iter, int cpu)
> +{
> +     return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : 
> iter->rb_iters[cpu];
> +}
> +
> +static struct ring_buffer_event *
> +__peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned 
> long *lost_events)
> +{
> +     struct ring_buffer_event *rb_evt;
> +     struct ring_buffer_iter *rb_iter;
> +
> +     switch (iter->type) {
> +     case TRI_CONSUMING:
> +             return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, 
> lost_events);
> +     case TRI_NONCONSUMING:
> +             rb_iter = __get_rb_iter(iter, cpu);
> +             rb_evt = ring_buffer_iter_peek(rb_iter, ts);
> +             if (!rb_evt)
> +                     return NULL;
> +
> +             *lost_events = ring_buffer_iter_dropped(rb_iter);
> +
> +             return rb_evt;
> +     }
> +
> +     return NULL;
> +}
> +
>  static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
>  {
>       struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
> @@ -318,7 +493,7 @@ static bool trace_remote_iter_read_event(struct 
> trace_remote_iterator *iter)
>               if (ring_buffer_empty_cpu(trace_buffer, cpu))
>                       return false;
>  
> -             if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, 
> &iter->lost_events))
> +             if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
>                       return false;
>  
>               iter->evt_cpu = cpu;
> @@ -333,7 +508,7 @@ static bool trace_remote_iter_read_event(struct 
> trace_remote_iterator *iter)
>               if (ring_buffer_empty_cpu(trace_buffer, cpu))
>                       continue;
>  
> -             if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
> +             if (!__peek_event(iter, cpu, &ts, &lost_events))
>                       continue;
>  
>               if (ts >= iter->ts)
> @@ -347,6 +522,20 @@ static bool trace_remote_iter_read_event(struct 
> trace_remote_iterator *iter)
>       return iter->ts != U64_MAX;
>  }
>  
> +static void trace_remote_iter_move(struct trace_remote_iterator *iter)
> +{
> +     struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
> +
> +     switch (iter->type) {
> +     case TRI_CONSUMING:
> +             ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
> +             break;
> +     case TRI_NONCONSUMING:
> +             ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
> +             break;
> +     }
> +}
> +
>  static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
>  {
>       unsigned long usecs_rem;
> @@ -369,13 +558,14 @@ static int trace_pipe_open(struct inode *inode, struct 
> file *filp)
>  {
>       struct trace_remote *remote = inode->i_private;
>       struct trace_remote_iterator *iter;
> -     int cpu = RING_BUFFER_ALL_CPUS;
> -
> -     if (inode->i_cdev)
> -             cpu = (long)inode->i_cdev - 1;
> +     int cpu = tracing_get_cpu(inode);
>  
>       guard(mutex)(&remote->lock);
> -     iter = trace_remote_iter(remote, cpu);
> +
> +     iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
> +     if (IS_ERR(iter))
> +             return PTR_ERR(iter);
> +
>       filp->private_data = iter;
>  
>       return IS_ERR(iter) ? PTR_ERR(iter) : 0;
> @@ -410,6 +600,8 @@ static ssize_t trace_pipe_read(struct file *filp, char 
> __user *ubuf, size_t cnt,
>       if (ret < 0)
>               return ret;
>  
> +     trace_remote_iter_read_start(iter);
> +
>       while (trace_remote_iter_read_event(iter)) {
>               int prev_len = iter->seq.seq.len;
>  
> @@ -418,9 +610,11 @@ static ssize_t trace_pipe_read(struct file *filp, char 
> __user *ubuf, size_t cnt,
>                       break;
>               }
>  
> -             ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
> +             trace_remote_iter_move(iter);
>       }
>  
> +     trace_remote_iter_read_finished(iter);
> +
>       goto copy_to_user;
>  }
>  
> @@ -430,14 +624,123 @@ static const struct file_operations trace_pipe_fops = {
>       .release        = trace_pipe_release,
>  };
>  
> +static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +     struct trace_remote_iterator *iter = m->private;
> +
> +     ++*pos;
> +
> +     if (!iter || !trace_remote_iter_read_event(iter))
> +             return NULL;
> +
> +     trace_remote_iter_move(iter);
> +     iter->pos++;
> +
> +     return iter;
> +}
> +
> +static void *trace_start(struct seq_file *m, loff_t *pos)
> +{
> +     struct trace_remote_iterator *iter = m->private;
> +     loff_t i;
> +

FYI, this is where you take locks for iteration of files.

> +     if (!iter)
> +             return NULL;
> +
> +     if (!*pos) {
> +             iter->pos = -1;
> +             return trace_next(m, NULL, &i);
> +     }
> +
> +     i = iter->pos;
> +     while (i < *pos) {
> +             iter = trace_next(m, NULL, &i);
> +             if (!iter)
> +                     return NULL;
> +     }
> +
> +     return iter;
> +}
> +
> +static int trace_show(struct seq_file *m, void *v)
> +{
> +     struct trace_remote_iterator *iter = v;
> +
> +     trace_seq_init(&iter->seq);
> +
> +     if (trace_remote_iter_print_event(iter)) {
> +             seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
> +             return 0;
> +     }
> +
> +     return trace_print_seq(m, &iter->seq);
> +}
> +
> +static void trace_stop(struct seq_file *s, void *v) { }

And stop is where you release the locks.

> +
> +static const struct seq_operations trace_sops = {
> +     .start          = trace_start,
> +     .next           = trace_next,
> +     .show           = trace_show,
> +     .stop           = trace_stop,
> +};
> +
> +static int trace_open(struct inode *inode, struct file *filp)
> +{
> +     struct trace_remote *remote = inode->i_private;
> +     struct trace_remote_iterator *iter = NULL;
> +     int cpu = tracing_get_cpu(inode);
> +     int ret;
> +
> +     if (!(filp->f_mode & FMODE_READ))
> +             return 0;
> +
> +     guard(mutex)(&remote->lock);
> +
> +     iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
> +     if (IS_ERR(iter))
> +             return PTR_ERR(iter);

So if iter is bad we exit out here.

> +
> +     ret = seq_open(filp, &trace_sops);
> +     if (ret) {
> +             trace_remote_iter_free(iter);
> +             return ret;
> +     }
> +
> +     if (iter)

Why test if iter exists here?

> +             trace_remote_iter_read_start(iter);

But still, the above grabs locks in the open, where it can return to user
space while still holding the locks? That's a no-no.

You can use the seq file start and stop for locking.

-- Steve

> +
> +     ((struct seq_file *)filp->private_data)->private = (void *)iter;
> +
> +     return 0;
> +}
> +
> +static int trace_release(struct inode *inode, struct file *filp)
> +{
> +     struct trace_remote_iterator *iter;
> +
> +     if (!(filp->f_mode & FMODE_READ))
> +             return 0;
> +
> +     iter = ((struct seq_file *)filp->private_data)->private;
> +     seq_release(inode, filp);
> +
> +     if (!iter)
> +             return 0;
> +
> +     guard(mutex)(&iter->remote->lock);
> +
> +     trace_remote_iter_read_finished(iter);
> +     trace_remote_iter_free(iter);
> +
> +     return 0;
> +}
> +
>  static ssize_t trace_write(struct file *filp, const char __user *ubuf, 
> size_t cnt, loff_t *ppos)
>  {
>       struct inode *inode = file_inode(filp);
>       struct trace_remote *remote = inode->i_private;
> -     int cpu = RING_BUFFER_ALL_CPUS;
> -
> -     if (inode->i_cdev)
> -             cpu = (long)inode->i_cdev - 1;
> +     int cpu = tracing_get_cpu(inode);
>  
>       guard(mutex)(&remote->lock);
>  
> @@ -447,7 +750,11 @@ static ssize_t trace_write(struct file *filp, const char 
> __user *ubuf, size_t cn
>  }
>  
>  static const struct file_operations trace_fops = {
> +     .open           = trace_open,
>       .write          = trace_write,
> +     .read           = seq_read,
> +     .read_iter      = seq_read_iter,
> +     .release        = trace_release,
>  };
>  
>  static int trace_remote_init_tracefs(const char *name, struct trace_remote 
> *remote)
> @@ -566,6 +873,7 @@ int trace_remote_register(const char *name, struct 
> trace_remote_callbacks *cbs,
>       remote->trace_buffer_size = 7 << 10;
>       remote->poll_ms = 100;
>       mutex_init(&remote->lock);
> +     init_rwsem(&remote->reader_lock);
>  
>       if (trace_remote_init_tracefs(name, remote)) {
>               kfree(remote);


Reply via email to