From: Peter Zijlstra <pet...@infradead.org>

commit 1cf8dfe8a661f0462925df943140e9f6d1ea5233 upstream.

Syzcaller reported the following Use-after-Free bug:

        close()                                         clone()

                                                          copy_process()
                                                            
perf_event_init_task()
                                                              
perf_event_init_context()
                                                                
mutex_lock(parent_ctx->mutex)
                                                                
inherit_task_group()
                                                                  
inherit_group()
                                                                    
inherit_event()
                                                                      
mutex_lock(event->child_mutex)
                                                                      // expose 
event on child list
                                                                      
list_add_tail()
                                                                      
mutex_unlock(event->child_mutex)
                                                                
mutex_unlock(parent_ctx->mutex)

                                                            ...
                                                            goto bad_fork_*

                                                          bad_fork_cleanup_perf:
                                                            
perf_event_free_task()

          perf_release()
            perf_event_release_kernel()
              list_for_each_entry()
                mutex_lock(ctx->mutex)
                mutex_lock(event->child_mutex)
                // event is from the failing inherit
                // on the other CPU
                perf_remove_from_context()
                list_move()
                mutex_unlock(event->child_mutex)
                mutex_unlock(ctx->mutex)

                                                              
mutex_lock(ctx->mutex)
                                                              
list_for_each_entry_safe()
                                                                // event 
already stolen
                                                              
mutex_unlock(ctx->mutex)

                                                            delayed_free_task()
                                                              free_task()

             list_for_each_entry_safe()
               list_del()
               free_event()
                 _free_event()
                   // and so event->hw.target
                   // is the already freed failed clone()
                   if (event->hw.target)
                     put_task_struct(event->hw.target)
                       // WHOOPSIE, already quite dead

Which puts the lie to the the comment on perf_event_free_task():
'unexposed, unused context' not so much.

Which is a 'fun' confluence of fail; copy_process() doing an
unconditional free_task() and not respecting refcounts, and perf having
creative locking. In particular:

  82d94856fa22 ("perf/core: Fix lock inversion between perf,trace,cpuhp")

seems to have overlooked this 'fun' parade.

Solve it by using the fact that detached events still have a reference
count on their (previous) context. With this perf_event_free_task()
can detect when events have escaped and wait for their destruction.

Debugged-by: Alexander Shishkin <alexander.shish...@linux.intel.com>
Reported-by: syzbot+a24c397a29ad22d86...@syzkaller.appspotmail.com
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Acked-by: Mark Rutland <mark.rutl...@arm.com>
Cc: <sta...@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shish...@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <a...@redhat.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Stephane Eranian <eran...@google.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Vince Weaver <vincent.wea...@maine.edu>
Fixes: 82d94856fa22 ("perf/core: Fix lock inversion between perf,trace,cpuhp")
Signed-off-by: Ingo Molnar <mi...@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>

---
 kernel/events/core.c |   49 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 8 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4459,12 +4459,20 @@ static void _free_event(struct perf_even
        if (event->destroy)
                event->destroy(event);
 
-       if (event->ctx)
-               put_ctx(event->ctx);
-
+       /*
+        * Must be after ->destroy(), due to uprobe_perf_close() using
+        * hw.target.
+        */
        if (event->hw.target)
                put_task_struct(event->hw.target);
 
+       /*
+        * perf_event_free_task() relies on put_ctx() being 'last', in 
particular
+        * all task references must be cleaned up.
+        */
+       if (event->ctx)
+               put_ctx(event->ctx);
+
        exclusive_event_destroy(event);
        module_put(event->pmu->module);
 
@@ -4644,8 +4652,17 @@ again:
        mutex_unlock(&event->child_mutex);
 
        list_for_each_entry_safe(child, tmp, &free_list, child_list) {
+               void *var = &child->ctx->refcount;
+
                list_del(&child->child_list);
                free_event(child);
+
+               /*
+                * Wake any perf_event_free_task() waiting for this event to be
+                * freed.
+                */
+               smp_mb(); /* pairs with wait_var_event() */
+               wake_up_var(var);
        }
 
 no_ctx:
@@ -11506,11 +11523,11 @@ static void perf_free_event(struct perf_
 }
 
 /*
- * Free an unexposed, unused context as created by inheritance by
- * perf_event_init_task below, used by fork() in case of fail.
+ * Free a context as created by inheritance by perf_event_init_task() below,
+ * used by fork() in case of fail.
  *
- * Not all locks are strictly required, but take them anyway to be nice and
- * help out with the lockdep assertions.
+ * Even though the task has never lived, the context and events have been
+ * exposed through the child_list, so we must take care tearing it all down.
  */
 void perf_event_free_task(struct task_struct *task)
 {
@@ -11540,7 +11557,23 @@ void perf_event_free_task(struct task_st
                        perf_free_event(event, ctx);
 
                mutex_unlock(&ctx->mutex);
-               put_ctx(ctx);
+
+               /*
+                * perf_event_release_kernel() could've stolen some of our
+                * child events and still have them on its free_list. In that
+                * case we must wait for these events to have been freed (in
+                * particular all their references to this task must've been
+                * dropped).
+                *
+                * Without this copy_process() will unconditionally free this
+                * task (irrespective of its reference count) and
+                * _free_event()'s put_task_struct(event->hw.target) will be a
+                * use-after-free.
+                *
+                * Wait for all events to drop their context reference.
+                */
+               wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 
1);
+               put_ctx(ctx); /* must be last */
        }
 }
 


Reply via email to