On Thu, Dec 03, 2015 at 12:32:39PM +0200, Alexander Shishkin wrote:
> +++ b/kernel/events/core.c
> @@ -4630,11 +4630,62 @@ static void perf_mmap_close(struct vm_area_struct 
> *vma)
>        */
>       if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
>           atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) 
> {
> +             struct perf_event *iter;
> +             LIST_HEAD(stop_list);
> +             unsigned long flags;
> +
> +             /*
> +              * Stop all aux events that are writing to this here buffer,
> +              * so that we can free its aux pages and corresponding pmu
> +              * data. Note that after rb::aux_mmap_count dropped to zero,
> +              * they won't start any more (see perf_aux_output_begin()).
> +              *
> +              * Since we can't take ctx::mutex under rb::event_lock, we
> +              * need to jump through hoops to get there, namely fish out
> +              * all events from rb::event_list onto an on-stack list,
> +              * carry out the stopping and splice this on-stack list back
> +              * to rb::event_list.
> +              * This means that these events will miss wakeups during this
> +              * window, but since it's mmap_close, assume the consumer
> +              * doesn't care any more.
> +              *
> +              * Note: list_splice_init_rcu() doesn't cut it, since it syncs
> +              * and rb::event_lock is a spinlock.
> +              */
> +retry:
> +             spin_lock_irqsave(&rb->event_lock, flags);
> +             list_for_each_entry_rcu(iter, &rb->event_list, rb_entry) {
> +                     list_del_rcu(&iter->rb_entry);
> +                     spin_unlock_irqrestore(&rb->event_lock, flags);
> +
> +                     synchronize_rcu();
> +                     list_add_tail(&iter->rb_entry, &stop_list);
> +
> +                     goto retry;
> +             }
> +             spin_unlock_irqrestore(&rb->event_lock, flags);
> +
> +             mutex_unlock(&event->mmap_mutex);
> +
> +             list_for_each_entry(iter, &stop_list, rb_entry) {
> +                     if (!has_aux(iter))
> +                             continue;
> +
> +                     perf_event_stop(iter);
> +             }
> +
> +             /* and splice it back now that we're done with them */
> +             spin_lock_irqsave(&rb->event_lock, flags);
> +             list_splice_tail(&stop_list, &rb->event_list);
> +             spin_unlock_irqrestore(&rb->event_lock, flags);
> +
> +             /* now it's safe to free the pages */
>               atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
>               vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
>  
> +             /* this has to be the last one */
>               rb_free_aux(rb);
> -             mutex_unlock(&event->mmap_mutex);
> +             WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
>       }

Yuck, nasty problem. Also, I think its broken. By not having
mmap_mutex around the whole thing, notably rb_free_aux(), you can race
against mmap().

What seems possible now is that:

        mmap(aux); // rb->aux_mmap_count == 1
        munmap(aux)
          atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex); 
// == 0

          mutex_unlock(&event->mmap_mutex);

                                        mmap(aux)
                                          if (rb_has_aux())
                                            atomic_inc(&rb->aux_mmap_count); // 
== 1

          rb_free_aux(); // oops!!




So I thought that pulling all the aux bits out from the ring_buffer
struct, such that we have rb->aux, would solve the issue in that we can
then fix mmap() to have the same retry loop as for event->rb.

And while that fixes that race (I almost had that patch complete -- I
might still send it out, just so you can see what it looks like), it
doesn't solve the complete problem I don't think.

Because in that case, you want the event to start again on the new
buffer, and I think its possible we end up calling ->start() before
we've issued the ->stop() and that would be BAD (tm).

The only solution I've come up with is:

        struct rb_aux *aux = rb->aux;

        if (aux && vma->vm_pgoff == aux->pgoff) {
                ctx = perf_event_ctx_lock(event);
                if (!atomic_dec_and_mutex_lock(&aux->mmap_count, 
&event->mmap_mutex) {
                        /* we now hold both ctx::mutex and event::mmap_mutex */
                        rb->aux = NULL;
                        ring_buffer_put(rb); /* aux had a reference */
                        _perf_event_stop(event);
                        ring_buffer_put_aux(aux); /* should be last */
                        mutex_unlock(&event->mmap_mutex);
                }
                mutex_unlock(&ctx->mutex);
        }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to