On Thu, 11 Jul 2024 13:02:39 +0200 Peter Zijlstra <pet...@infradead.org> wrote:
> With handle_swbp() triggering concurrently on (all) CPUs, tree_lock > becomes a bottleneck. Avoid treelock by doing RCU lookups of the > uprobe. > Looks good to me. Acked-by: Masami Hiramatsu (Google) <mhira...@kernel.org> Thanks, > Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> > --- > kernel/events/uprobes.c | 49 > +++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 40 insertions(+), 9 deletions(-) > > --- a/kernel/events/uprobes.c > +++ b/kernel/events/uprobes.c > @@ -40,6 +40,7 @@ static struct rb_root uprobes_tree = RB_ > #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) > > static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ > +static seqcount_rwlock_t uprobes_seqcount = > SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); > > #define UPROBES_HASH_SZ 13 > /* serialize uprobe->pending_list */ > @@ -54,6 +55,7 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) > struct uprobe { > struct rb_node rb_node; /* node in the rb tree */ > refcount_t ref; > + struct rcu_head rcu; > struct rw_semaphore register_rwsem; > struct rw_semaphore consumer_rwsem; > struct list_head pending_list; > @@ -587,12 +589,25 @@ set_orig_insn(struct arch_uprobe *auprob > *(uprobe_opcode_t *)&auprobe->insn); > } > > +static struct uprobe *try_get_uprobe(struct uprobe *uprobe) > +{ > + if (refcount_inc_not_zero(&uprobe->ref)) > + return uprobe; > + return NULL; > +} > + > static struct uprobe *get_uprobe(struct uprobe *uprobe) > { > refcount_inc(&uprobe->ref); > return uprobe; > } > > +static void uprobe_free_rcu(struct rcu_head *rcu) > +{ > + struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); > + kfree(uprobe); > +} > + > static void put_uprobe(struct uprobe *uprobe) > { > if (refcount_dec_and_test(&uprobe->ref)) { > @@ -604,7 +619,7 @@ static void put_uprobe(struct uprobe *up > mutex_lock(&delayed_uprobe_lock); > delayed_uprobe_remove(uprobe, NULL); > mutex_unlock(&delayed_uprobe_lock); > - kfree(uprobe); > + call_rcu(&uprobe->rcu, uprobe_free_rcu); > } > } > > @@ -653,10 +668,10 @@ static struct uprobe *__find_uprobe(stru > .inode = inode, > .offset = offset, > }; > - struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key); > + struct rb_node *node = rb_find_rcu(&key, &uprobes_tree, > __uprobe_cmp_key); > > if (node) > - return get_uprobe(__node_2_uprobe(node)); > + return try_get_uprobe(__node_2_uprobe(node)); > > return NULL; > } > @@ -667,20 +682,32 @@ static struct uprobe *__find_uprobe(stru > */ > static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) > { > - struct uprobe *uprobe; > + unsigned int seq; > > - read_lock(&uprobes_treelock); > - uprobe = __find_uprobe(inode, offset); > - read_unlock(&uprobes_treelock); > + guard(rcu)(); > > - return uprobe; > + do { > + seq = read_seqcount_begin(&uprobes_seqcount); > + struct uprobe *uprobe = __find_uprobe(inode, offset); > + if (uprobe) { > + /* > + * Lockless RB-tree lookups are prone to > false-negatives. > + * If they find something, it's good. If they do not > find, > + * it needs to be validated. > + */ > + return uprobe; > + } > + } while (read_seqcount_retry(&uprobes_seqcount, seq)); > + > + /* Really didn't find anything. */ > + return NULL; > } > > static struct uprobe *__insert_uprobe(struct uprobe *uprobe) > { > struct rb_node *node; > > - node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); > + node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); > if (node) > return get_uprobe(__node_2_uprobe(node)); > > @@ -702,7 +729,9 @@ static struct uprobe *insert_uprobe(stru > struct uprobe *u; > > write_lock(&uprobes_treelock); > + write_seqcount_begin(&uprobes_seqcount); > u = __insert_uprobe(uprobe); > + write_seqcount_end(&uprobes_seqcount); > write_unlock(&uprobes_treelock); > > return u; > @@ -936,7 +965,9 @@ static void delete_uprobe(struct uprobe > return; > > write_lock(&uprobes_treelock); > + write_seqcount_begin(&uprobes_seqcount); > rb_erase(&uprobe->rb_node, &uprobes_tree); > + write_seqcount_end(&uprobes_seqcount); > write_unlock(&uprobes_treelock); > RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ > put_uprobe(uprobe); > > -- Masami Hiramatsu (Google) <mhira...@kernel.org>