From: Peter Zijlstra <pet...@infradead.org>

The kretprobe hash is mostly superfluous, replace it with a per-task
variable.

This gets rid of the task hash and it's related locking.

The whole invalidate_rp_inst() is tedious and could go away once we
drop rp specific ri size.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
  Changes:
    - [MH] ported on Masami's latest version
    - [MH] remove unneeded last node checking and unused variables
    - [MH] Fix to remove unneeded hlist_del from recycle_rp_inst()
---
 include/linux/kprobes.h |    1 
 include/linux/sched.h   |    4 +
 kernel/fork.c           |    4 +
 kernel/kprobes.c        |  232 ++++++++++++++++++-----------------------------
 4 files changed, 100 insertions(+), 141 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 9c880c8a4e80..a30cccb07f21 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,6 +157,7 @@ struct kretprobe {
 
 struct kretprobe_instance {
        union {
+               struct llist_node llist;
                struct hlist_node hlist;
                struct rcu_head rcu;
        };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93ecd930efd3..0f2532f052a9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1315,6 +1315,10 @@ struct task_struct {
        struct callback_head            mce_kill_me;
 #endif
 
+#ifdef CONFIG_KRETPROBES
+       struct llist_head               kretprobe_instances;
+#endif
+
        /*
         * New fields for task_struct should be added above here, so that
         * they are included in the randomized portion of task_struct.
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d32190861bd..2ff5cceb0732 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2161,6 +2161,10 @@ static __latent_entropy struct task_struct *copy_process(
        INIT_LIST_HEAD(&p->thread_group);
        p->task_works = NULL;
 
+#ifdef CONFIG_KRETPROBES
+       p->kretprobe_instances.first = NULL;
+#endif
+
        /*
         * Ensure that the cgroup subsystem policies allow the new process to be
         * forked. It should be noted the the new process's css_set can be 
changed
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d0b4b7e89fa6..5904ce656ab7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -53,7 +53,6 @@ static int kprobes_initialized;
  * - RCU hlist traversal under disabling preempt (breakpoint handlers)
  */
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
-static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
 /* NOTE: change this value only with kprobe_mutex held */
 static bool kprobes_all_disarmed;
@@ -61,9 +60,6 @@ static bool kprobes_all_disarmed;
 /* This protects kprobe_table and optimizing_list */
 static DEFINE_MUTEX(kprobe_mutex);
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
-static struct {
-       raw_spinlock_t lock ____cacheline_aligned_in_smp;
-} kretprobe_table_locks[KPROBE_TABLE_SIZE];
 
 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
                                        unsigned int __unused)
@@ -71,11 +67,6 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
        return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
 }
 
-static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
-{
-       return &(kretprobe_table_locks[hash].lock);
-}
-
 /* Blacklist -- list of struct kprobe_blacklist_entry */
 static LIST_HEAD(kprobe_blacklist);
 
@@ -1227,8 +1218,6 @@ static void recycle_rp_inst(struct kretprobe_instance *ri)
 {
        struct kretprobe *rp = ri->rp;
 
-       /* remove rp inst off the rprobe_inst_table */
-       hlist_del(&ri->hlist);
        INIT_HLIST_NODE(&ri->hlist);
        if (likely(rp)) {
                raw_spin_lock(&rp->lock);
@@ -1239,49 +1228,6 @@ static void recycle_rp_inst(struct kretprobe_instance 
*ri)
 }
 NOKPROBE_SYMBOL(recycle_rp_inst);
 
-static void kretprobe_hash_lock(struct task_struct *tsk,
-                        struct hlist_head **head, unsigned long *flags)
-__acquires(hlist_lock)
-{
-       unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
-       raw_spinlock_t *hlist_lock;
-
-       *head = &kretprobe_inst_table[hash];
-       hlist_lock = kretprobe_table_lock_ptr(hash);
-       raw_spin_lock_irqsave(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_hash_lock);
-
-static void kretprobe_table_lock(unsigned long hash,
-                                unsigned long *flags)
-__acquires(hlist_lock)
-{
-       raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
-       raw_spin_lock_irqsave(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_table_lock);
-
-static void kretprobe_hash_unlock(struct task_struct *tsk,
-                          unsigned long *flags)
-__releases(hlist_lock)
-{
-       unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
-       raw_spinlock_t *hlist_lock;
-
-       hlist_lock = kretprobe_table_lock_ptr(hash);
-       raw_spin_unlock_irqrestore(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_hash_unlock);
-
-static void kretprobe_table_unlock(unsigned long hash,
-                                  unsigned long *flags)
-__releases(hlist_lock)
-{
-       raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
-       raw_spin_unlock_irqrestore(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_table_unlock);
-
 static struct kprobe kprobe_busy = {
        .addr = (void *) get_kprobe,
 };
@@ -1311,24 +1257,23 @@ void kprobe_busy_end(void)
 void kprobe_flush_task(struct task_struct *tk)
 {
        struct kretprobe_instance *ri;
-       struct hlist_head *head;
-       struct hlist_node *tmp;
-       unsigned long hash, flags = 0;
+       struct llist_node *node;
 
+       /* Early boot, not yet initialized. */
        if (unlikely(!kprobes_initialized))
-               /* Early boot.  kretprobe_table_locks not yet initialized. */
                return;
 
        kprobe_busy_begin();
 
-       hash = hash_ptr(tk, KPROBE_HASH_BITS);
-       head = &kretprobe_inst_table[hash];
-       kretprobe_table_lock(hash, &flags);
-       hlist_for_each_entry_safe(ri, tmp, head, hlist) {
-               if (ri->task == tk)
-                       recycle_rp_inst(ri);
+       node = current->kretprobe_instances.first;
+       current->kretprobe_instances.first = NULL;
+
+       while (node) {
+               ri = container_of(node, struct kretprobe_instance, llist);
+               node = node->next;
+
+               recycle_rp_inst(ri);
        }
-       kretprobe_table_unlock(hash, &flags);
 
        kprobe_busy_end();
 }
@@ -1345,24 +1290,70 @@ static inline void free_rp_inst(struct kretprobe *rp)
        }
 }
 
-static void cleanup_rp_inst(struct kretprobe *rp)
+/* XXX all of this only exists because we have rp specific ri's */
+
+static bool __invalidate_rp_inst(struct task_struct *t, void *rp)
 {
-       unsigned long flags, hash;
+       struct llist_node *node = t->kretprobe_instances.first;
        struct kretprobe_instance *ri;
-       struct hlist_node *next;
-       struct hlist_head *head;
+
+       while (node) {
+               ri = container_of(node, struct kretprobe_instance, llist);
+               node = node->next;
+
+               if (ri->rp == rp)
+                       ri->rp = NULL;
+       }
+
+       return true;
+}
+
+struct invl_rp_ipi {
+       struct task_struct *task;
+       void *rp;
+       bool done;
+};
+
+static void __invalidate_rp_ipi(void *arg)
+{
+       struct invl_rp_ipi *iri = arg;
+
+       if (iri->task == current)
+               iri->done = __invalidate_rp_inst(iri->task, iri->rp);
+}
+
+static void invalidate_rp_inst(struct task_struct *t, struct kretprobe *rp)
+{
+       struct invl_rp_ipi iri = {
+               .task = t,
+               .rp = rp,
+               .done = false
+       };
+
+       for (;;) {
+               if (try_invoke_on_locked_down_task(t, __invalidate_rp_inst, rp))
+                       return;
+
+               smp_call_function_single(task_cpu(t), __invalidate_rp_ipi, 
&iri, 1);
+               if (iri.done)
+                       return;
+       }
+}
+
+static void cleanup_rp_inst(struct kretprobe *rp)
+{
+       struct task_struct *p, *t;
 
        /* To avoid recursive kretprobe by NMI, set kprobe busy here */
        kprobe_busy_begin();
-       for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
-               kretprobe_table_lock(hash, &flags);
-               head = &kretprobe_inst_table[hash];
-               hlist_for_each_entry_safe(ri, next, head, hlist) {
-                       if (ri->rp == rp)
-                               ri->rp = NULL;
-               }
-               kretprobe_table_unlock(hash, &flags);
+       rcu_read_lock();
+       for_each_process_thread(p, t) {
+               if (!t->kretprobe_instances.first)
+                       continue;
+
+               invalidate_rp_inst(t, rp);
        }
+       rcu_read_unlock();
        kprobe_busy_end();
 
        free_rp_inst(rp);
@@ -1928,70 +1919,39 @@ unsigned long __kretprobe_trampoline_handler(struct 
pt_regs *regs,
                                             void *trampoline_address,
                                             void *frame_pointer)
 {
-       struct kretprobe_instance *ri = NULL, *last = NULL;
-       struct hlist_head *head;
-       struct hlist_node *tmp;
-       unsigned long flags;
        kprobe_opcode_t *correct_ret_addr = NULL;
-       bool skipped = false;
+       struct kretprobe_instance *ri = NULL;
+       struct llist_node *first, *node;
 
-       kretprobe_hash_lock(current, &head, &flags);
+       first = node = current->kretprobe_instances.first;
+       while (node) {
+               ri = container_of(node, struct kretprobe_instance, llist);
 
-       /*
-        * It is possible to have multiple instances associated with a given
-        * task either because multiple functions in the call path have
-        * return probes installed on them, and/or more than one
-        * return probe was registered for a target function.
-        *
-        * We can handle this because:
-        *     - instances are always pushed into the head of the list
-        *     - when multiple return probes are registered for the same
-        *       function, the (chronologically) first instance's ret_addr
-        *       will be the real return address, and all the rest will
-        *       point to kretprobe_trampoline.
-        */
-       hlist_for_each_entry(ri, head, hlist) {
-               if (ri->task != current)
-                       /* another task is sharing our hash bucket */
-                       continue;
-               /*
-                * Return probes must be pushed on this hash list correct
-                * order (same as return order) so that it can be popped
-                * correctly. However, if we find it is pushed it incorrect
-                * order, this means we find a function which should not be
-                * probed, because the wrong order entry is pushed on the
-                * path of processing other kretprobe itself.
-                */
-               if (ri->fp != frame_pointer) {
-                       if (!skipped)
-                               pr_warn("kretprobe is stacked incorrectly. 
Trying to fixup.\n");
-                       skipped = true;
-                       continue;
-               }
+               BUG_ON(ri->fp != frame_pointer);
 
                correct_ret_addr = ri->ret_addr;
-               if (skipped)
-                       pr_warn("%ps must be blacklisted because of incorrect 
kretprobe order\n",
-                               ri->rp->kp.addr);
-
-               if (correct_ret_addr != trampoline_address)
+               if (correct_ret_addr != trampoline_address) {
                        /*
                         * This is the real return address. Any other
                         * instances associated with this task are for
                         * other calls deeper on the call stack
                         */
                        break;
+               }
+
+               node = node->next;
        }
 
        BUG_ON(!correct_ret_addr || (correct_ret_addr == trampoline_address));
-       last = ri;
 
-       hlist_for_each_entry_safe(ri, tmp, head, hlist) {
-               if (ri->task != current)
-                       /* another task is sharing our hash bucket */
-                       continue;
-               if (ri->fp != frame_pointer)
-                       continue;
+       /* Unlink all nodes for this frame. */
+       current->kretprobe_instances.first = node->next;
+       node->next = NULL;
+
+       /* Run them..  */
+       while (first) {
+               ri = container_of(first, struct kretprobe_instance, llist);
+               node = first->next;
 
                if (ri->rp && ri->rp->handler) {
                        __this_cpu_write(current_kprobe, &ri->rp->kp);
@@ -2002,12 +1962,9 @@ unsigned long __kretprobe_trampoline_handler(struct 
pt_regs *regs,
 
                recycle_rp_inst(ri);
 
-               if (ri == last)
-                       break;
+               first = node;
        }
 
-       kretprobe_hash_unlock(current, &flags);
-
        return (unsigned long)correct_ret_addr;
 }
 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
@@ -2019,11 +1976,10 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
        struct kretprobe *rp = container_of(p, struct kretprobe, kp);
-       unsigned long hash, flags = 0;
+       unsigned long flags = 0;
        struct kretprobe_instance *ri;
 
        /* TODO: consider to only swap the RA after the last pre_handler fired 
*/
-       hash = hash_ptr(current, KPROBE_HASH_BITS);
        raw_spin_lock_irqsave(&rp->lock, flags);
        if (!hlist_empty(&rp->free_instances)) {
                ri = hlist_entry(rp->free_instances.first,
@@ -2043,11 +1999,8 @@ static int pre_handler_kretprobe(struct kprobe *p, 
struct pt_regs *regs)
 
                arch_prepare_kretprobe(ri, regs);
 
-               /* XXX(hch): why is there no hlist_move_head? */
-               INIT_HLIST_NODE(&ri->hlist);
-               kretprobe_table_lock(hash, &flags);
-               hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
-               kretprobe_table_unlock(hash, &flags);
+               __llist_add(&ri->llist, &current->kretprobe_instances);
+
        } else {
                rp->nmissed++;
                raw_spin_unlock_irqrestore(&rp->lock, flags);
@@ -2532,11 +2485,8 @@ static int __init init_kprobes(void)
 
        /* FIXME allocate the probe table, currently defined statically */
        /* initialize all list heads */
-       for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+       for (i = 0; i < KPROBE_TABLE_SIZE; i++)
                INIT_HLIST_HEAD(&kprobe_table[i]);
-               INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
-               raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
-       }
 
        err = populate_kprobe_blacklist(__start_kprobe_blacklist,
                                        __stop_kprobe_blacklist);

Reply via email to