Maintain a per-user cpu-indexed array of shmemfs-backed events, same
way as mlock accounting.

Signed-off-by: Alexander Shishkin <alexander.shish...@linux.intel.com>
---
 include/linux/sched/user.h  |  6 ++++
 kernel/events/core.c        | 14 ++++-----
 kernel/events/ring_buffer.c | 69 +++++++++++++++++++++++++++++++++++++--------
 kernel/user.c               |  1 +
 4 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 5d5415e129..bf10f95250 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -5,6 +5,7 @@
 #include <linux/atomic.h>
 
 struct key;
+struct perf_event;
 
 /*
  * Some day this will be a full-fledged user tracking system..
@@ -39,6 +40,11 @@ struct user_struct {
 #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
        atomic_long_t locked_vm;
 #endif
+#ifdef CONFIG_PERF_EVENTS
+       atomic_long_t nr_pinnable_events;
+       struct mutex pinned_mutex;
+       struct perf_event ** __percpu pinned_events;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1fed69d4ba..e00f1f6aaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -384,7 +384,6 @@ static atomic_t perf_sched_count;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
-static DEFINE_PER_CPU(struct perf_event *, shmem_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -2086,7 +2085,8 @@ enum pin_event_t {
 
 static enum pin_event_t pin_event_pages(struct perf_event *event)
 {
-       struct perf_event **pinned_event = this_cpu_ptr(&shmem_events);
+       struct user_struct *user = event->rb->mmap_user;
+       struct perf_event **pinned_event = this_cpu_ptr(user->pinned_events);
        struct perf_event *old_event = *pinned_event;
 
        if (old_event == event)
@@ -4281,13 +4281,14 @@ static void _free_event(struct perf_event *event)
        unaccount_event(event);
 
        if (event->attach_state & PERF_ATTACH_SHMEM) {
+               struct user_struct *user = event->rb->mmap_user;
                struct perf_event_context *ctx = event->ctx;
                int cpu;
 
                atomic_set(&event->xpinned, 0);
                for_each_possible_cpu(cpu) {
                        struct perf_event **pinned_event =
-                               per_cpu_ptr(&shmem_events, cpu);
+                               per_cpu_ptr(user->pinned_events, cpu);
 
                        cmpxchg(pinned_event, event, NULL);
                }
@@ -9530,7 +9531,7 @@ perf_event_detach(struct perf_event *event, struct 
perf_event *parent_event,
 {
        struct ring_buffer *parent_rb = parent_event ? parent_event->rb : NULL;
        char *filename;
-       int err;
+       int err = -ENOMEM;
 
        filename = kasprintf(GFP_KERNEL, "%s:%x.event",
                             task ? "task" : "cpu",
@@ -9550,10 +9551,9 @@ perf_event_detach(struct perf_event *event, struct 
perf_event *parent_event,
        if (err) {
                tracefs_remove(event->dent);
                event->dent = NULL;
-               return err;
        }
 
-       return 0;
+       return err;
 }
 /*
  * Allocate and initialize a event structure
@@ -10290,7 +10290,7 @@ SYSCALL_DEFINE5(perf_event_open,
        }
 
        if (detached) {
-               err = perf_event_detach(event, task, NULL);
+               err = perf_event_detach(event, NULL, task, NULL);
                if (err)
                        goto err_context;
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 896d441642..8d37e4e591 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -563,6 +563,44 @@ void *perf_get_aux(struct perf_output_handle *handle)
        return handle->rb->aux_priv;
 }
 
+static struct user_struct *get_users_pinned_events(void)
+{
+       struct user_struct *user = current_user(), *ret = NULL;
+
+       if (atomic_long_inc_not_zero(&user->nr_pinnable_events))
+               return user;
+
+       mutex_lock(&user->pinned_mutex);
+       if (!atomic_long_read(&user->nr_pinnable_events)) {
+               if (WARN_ON_ONCE(!!user->pinned_events))
+                       goto unlock;
+
+               user->pinned_events = alloc_percpu(struct perf_event *);
+               if (!user->pinned_events) {
+                       goto unlock;
+               } else {
+                       atomic_long_inc(&user->nr_pinnable_events);
+                       ret = get_current_user();
+               }
+       }
+
+unlock:
+       mutex_unlock(&user->pinned_mutex);
+
+       return ret;
+}
+
+static void put_users_pinned_events(struct user_struct *user)
+{
+       if (!atomic_long_dec_and_test(&user->nr_pinnable_events))
+               return;
+
+       mutex_lock(&user->pinned_mutex);
+       free_percpu(user->pinned_events);
+       user->pinned_events = NULL;
+       mutex_unlock(&user->pinned_mutex);
+}
+
 /*
  * Check if the current user can afford @nr_pages, considering the
  * perf_event_mlock sysctl and their mlock limit. If the former is exceeded,
@@ -574,11 +612,14 @@ static int __ring_buffer_account(struct ring_buffer *rb, 
struct mm_struct *mm,
                                  unsigned long nr_pages, unsigned long *locked)
 {
        unsigned long total, limit, pinned;
+       struct user_struct *user;
 
        if (!mm)
                mm = rb->mmap_mapping;
 
-       rb->mmap_user = current_user();
+       user = get_users_pinned_events();
+       if (!user)
+               return -ENOMEM;
 
        limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
 
@@ -587,10 +628,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, 
struct mm_struct *mm,
         */
        limit *= num_online_cpus();
 
-       total = atomic_long_read(&rb->mmap_user->locked_vm) + nr_pages;
-
-       free_uid(rb->mmap_user);
-       rb->mmap_user = NULL;
+       total = atomic_long_read(&user->locked_vm) + nr_pages;
 
        pinned = 0;
        if (total > limit) {
@@ -599,7 +637,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, 
struct mm_struct *mm,
                 * limit needs to be accounted to the consumer's mm.
                 */
                if (!mm)
-                       return -EPERM;
+                       goto err_put_user;
 
                pinned = total - limit;
 
@@ -608,9 +646,8 @@ static int __ring_buffer_account(struct ring_buffer *rb, 
struct mm_struct *mm,
                total = mm->pinned_vm + pinned;
 
                if ((total > limit) && perf_paranoid_tracepoint_raw() &&
-                   !capable(CAP_IPC_LOCK)) {
-                       return -EPERM;
-               }
+                   !capable(CAP_IPC_LOCK))
+                       goto err_put_user;
 
                *locked = pinned;
                mm->pinned_vm += pinned;
@@ -619,10 +656,15 @@ static int __ring_buffer_account(struct ring_buffer *rb, 
struct mm_struct *mm,
        if (!rb->mmap_mapping)
                rb->mmap_mapping = mm;
 
-       rb->mmap_user = get_current_user();
-       atomic_long_add(nr_pages, &rb->mmap_user->locked_vm);
+       rb->mmap_user = user;
+       atomic_long_add(nr_pages, &user->locked_vm);
 
        return 0;
+
+err_put_user:
+       put_users_pinned_events(user);
+
+       return -EPERM;
 }
 
 static int ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
@@ -657,7 +699,7 @@ void ring_buffer_unaccount(struct ring_buffer *rb, bool aux)
        if (rb->mmap_mapping)
                rb->mmap_mapping->pinned_vm -= pinned;
 
-       free_uid(rb->mmap_user);
+       put_users_pinned_events(rb->mmap_user);
 }
 
 #define PERF_AUX_GFP   (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
@@ -1124,6 +1166,7 @@ rb_shmem_account(struct ring_buffer *rb, struct 
ring_buffer *parent_rb)
 
                rb->acct_refcount = parent_rb->acct_refcount;
                atomic_inc(rb->acct_refcount);
+               rb->mmap_user = get_uid(parent_rb->mmap_user);
 
                return 0;
        }
@@ -1146,6 +1189,8 @@ rb_shmem_account(struct ring_buffer *rb, struct 
ring_buffer *parent_rb)
 
 static void rb_shmem_unaccount(struct ring_buffer *rb)
 {
+       free_uid(rb->mmap_user);
+
        if (!atomic_dec_and_test(rb->acct_refcount)) {
                rb->acct_refcount = NULL;
                return;
diff --git a/kernel/user.c b/kernel/user.c
index 00281add65..e95a82d31d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -185,6 +185,7 @@ struct user_struct *alloc_uid(kuid_t uid)
 
                new->uid = uid;
                atomic_set(&new->__count, 1);
+               mutex_init(&new->pinned_mutex);
 
                /*
                 * Before adding this, check whether we raced
-- 
2.14.1

Reply via email to