The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-693.1.1.vz7.37.4 ------> commit d514c18772100d3f459f383ee1dca501118bc5cc Author: Peter Zijlstra <pet...@infradead.org> Date: Fri Sep 15 17:27:37 2017 +0300
module: Optimize __module_address() using a latched RB-tree Currently __module_address() is using a linear search through all modules in order to find the module corresponding to the provided address. With a lot of modules this can take a lot of time. One of the users of this is kernel_text_address() which is employed in many stack unwinders; which in turn are used by perf-callchain and ftrace (possibly from NMI context). So by optimizing __module_address() we optimize many stack unwinders which are used by both perf and tracing in performance sensitive code. Cc: Rusty Russell <ru...@rustcorp.com.au> Cc: Steven Rostedt <rost...@goodmis.org> Cc: Mathieu Desnoyers <mathieu.desnoy...@efficios.com> Cc: Oleg Nesterov <o...@redhat.com> Cc: "Paul E. McKenney" <paul...@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> Signed-off-by: Rusty Russell <ru...@rustcorp.com.au> https://jira.sw.ru/browse/PSBM-69081 (cherry picked from commit 93c2e105f6bcee231c951ba0e56e84505c4b0483) Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> --- include/linux/module.h | 32 +++++++++++--- kernel/module.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 138 insertions(+), 11 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index a4155ca..48c7335 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,6 +17,7 @@ #include <linux/moduleparam.h> #include <linux/tracepoint.h> #include <linux/export.h> +#include <linux/rbtree_latch.h> #include <linux/percpu.h> #include <asm/module.h> @@ -236,8 +237,14 @@ struct module_ext { #endif }; -struct module -{ +struct module; + +struct mod_tree_node { + struct module *mod; + struct latch_tree_node node; +}; + +struct module { enum module_state state; /* Member of list of modules */ @@ -296,8 +303,15 @@ struct module /* Startup function. */ int (*init)(void); - /* If this is non-NULL, vfree after init() returns */ - void *module_init; + /* + * If this is non-NULL, vfree() after init() returns. + * + * Cacheline align here, such that: + * module_init, module_core, init_size, core_size, + * init_text_size, core_text_size and ltn_core.node[0] + * are on the same cacheline. + */ + void *module_init ____cacheline_aligned; /* Here is the actual code + data, vfree'd on unload. */ void *module_core; @@ -308,6 +322,14 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* + * We want mtn_core::{mod,node[0]} to be in the same cacheline as the + * above entries such that a regular lookup will only touch one + * cacheline. + */ + struct mod_tree_node mtn_core; + struct mod_tree_node mtn_init; + /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; @@ -392,7 +414,7 @@ struct module ctor_fn_t *ctors; unsigned int num_ctors; #endif -}; +} ____cacheline_aligned; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif diff --git a/kernel/module.c b/kernel/module.c index 3f5edae..952a958 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -105,6 +105,108 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); + +/* + * Use a latched RB-tree for __module_address(); this allows us to use + * RCU-sched lookups of the address from any context. + * + * Because modules have two address ranges: init and core, we need two + * latch_tree_nodes entries. Therefore we need the back-pointer from + * mod_tree_node. + * + * Because init ranges are short lived we mark them unlikely and have placed + * them outside the critical cacheline in struct module. + */ + +static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) +{ + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; + + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->module_init; + + return (unsigned long)mod->module_core; +} + +static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) +{ + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; + + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->init_size; + + return (unsigned long)mod->core_size; +} + +static __always_inline bool +mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) +{ + return __mod_tree_val(a) < __mod_tree_val(b); +} + +static __always_inline int +mod_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long start, end; + + start = __mod_tree_val(n); + if (val < start) + return -1; + + end = start + __mod_tree_size(n); + if (val >= end) + return 1; + + return 0; +} + +static const struct latch_tree_ops mod_tree_ops = { + .less = mod_tree_less, + .comp = mod_tree_comp, +}; + +static struct latch_tree_root mod_tree __cacheline_aligned; + +/* + * These modifications: insert, remove_init and remove; are serialized by the + * module_mutex. + */ +static void mod_tree_insert(struct module *mod) +{ + mod->mtn_core.mod = mod; + mod->mtn_init.mod = mod; + + latch_tree_insert(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); + if (mod->init_size) + latch_tree_insert(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); +} + +static void mod_tree_remove_init(struct module *mod) +{ + if (mod->init_size) + latch_tree_erase(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); +} + +static void mod_tree_remove(struct module *mod) +{ + latch_tree_erase(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); + mod_tree_remove_init(mod); +} + +static struct module *mod_tree_find(unsigned long addr) +{ + struct latch_tree_node *ltn; + + ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops); + if (!ltn) + return NULL; + + return container_of(ltn, struct mod_tree_node, node)->mod; +} + #ifdef CONFIG_KGDB_KDB struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ @@ -1974,6 +2076,7 @@ static void free_module(struct module *mod) /* Now we can delete it from the lists */ mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); + mod_tree_remove(mod); mutex_unlock(&module_mutex); mutex_lock(&module_ext_mutex); @@ -3274,6 +3377,7 @@ static int do_init_module(struct module *mod) mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); mod->module_init = NULL; @@ -3323,6 +3427,7 @@ static int add_unformed_module(struct module *mod) goto out; } list_add_rcu(&mod->list, &modules); + mod_tree_insert(mod); err = 0; out: @@ -3987,14 +4092,14 @@ struct module *__module_address(unsigned long addr) if (addr < module_addr_min || addr > module_addr_max) return NULL; - list_for_each_entry_rcu(mod, &modules, list) { + mod = mod_tree_find(addr); + if (mod) { + BUG_ON(!(within_module_core(addr, mod) + || within_module_init(addr, mod))); if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module_core(addr, mod) - || within_module_init(addr, mod)) - return mod; + mod = NULL; } - return NULL; + return mod; } EXPORT_SYMBOL_GPL(__module_address); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel