From: Josh Poimboeuf <[email protected]> Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE option, which is a faster version of CONFIG_HAVE_STATIC_CALL. At runtime, the static call sites are patched directly, rather than using the out-of-line trampolines.
Compared to out-of-line static calls, the performance benefits are more modest, but still measurable. Steven Rostedt did some tracepoint measurements: https://lkml.kernel.org/r/[email protected] This code is heavily inspired by the jump label code (aka "static jumps"), as some of the concepts are very similar. For more details, see the comments in include/linux/static_call.h. Cc: [email protected] Cc: Steven Rostedt <[email protected]> Cc: Julia Cartwright <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Ard Biesheuvel <[email protected]> Cc: Jason Baron <[email protected]> Cc: Rasmus Villemoes <[email protected]> Cc: Daniel Bristot de Oliveira <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Jiri Kosina <[email protected]> Cc: Edward Cree <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: David Laight <[email protected]> Cc: Jessica Yu <[email protected]> Cc: Nadav Amit <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Signed-off-by: Josh Poimboeuf <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/c70ea8c00b93dadcb97b9d83659cf204121372d6.1547073843.git.jpoim...@redhat.com --- arch/Kconfig | 4 include/asm-generic/vmlinux.lds.h | 7 include/linux/module.h | 10 + include/linux/static_call.h | 63 +++++++ include/linux/static_call_types.h | 9 + kernel/Makefile | 1 kernel/module.c | 5 kernel/static_call.c | 316 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 414 insertions(+), 1 deletion(-) create mode 100644 kernel/static_call.c --- a/arch/Kconfig +++ b/arch/Kconfig @@ -930,6 +930,10 @@ config LOCK_EVENT_COUNTS config HAVE_STATIC_CALL bool +config HAVE_STATIC_CALL_INLINE + bool + depends on HAVE_STATIC_CALL + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -311,6 +311,12 @@ KEEP(*(__jump_table)) \ __stop___jump_table = .; +#define STATIC_CALL_DATA \ + . = ALIGN(8); \ + __start_static_call_sites = .; \ + KEEP(*(.static_call_sites)) \ + __stop_static_call_sites = .; + /* * Allow architectures to handle ro_after_init data on their * own by defining an empty RO_AFTER_INIT_DATA. @@ -320,6 +326,7 @@ __start_ro_after_init = .; \ *(.data..ro_after_init) \ JUMP_TABLE_DATA \ + STATIC_CALL_DATA \ __end_ro_after_init = .; #endif --- a/include/linux/module.h +++ b/include/linux/module.h @@ -21,6 +21,7 @@ #include <linux/rbtree_latch.h> #include <linux/error-injection.h> #include <linux/tracepoint-defs.h> +#include <linux/static_call_types.h> #include <linux/percpu.h> #include <asm/module.h> @@ -472,6 +473,10 @@ struct module { unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + int num_static_call_sites; + struct static_call_site *static_call_sites; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ @@ -728,6 +733,11 @@ static inline bool within_module(unsigne { return false; } + +static inline bool within_module_init(unsigned long addr, const struct module *mod) +{ + return false; +} /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -47,6 +47,12 @@ * Each static_call() site calls into a trampoline associated with the key. * The trampoline has a direct branch to the default function. Updates to a * key will modify the trampoline's branch destination. + * + * If the arch has CONFIG_HAVE_STATIC_CALL_INLINE, then the call sites + * themselves will be patched at runtime to call the functions directly, + * rather than calling through the trampoline. This requires objtool or a + * compiler plugin to detect all the static_call() sites and annotate them + * in the .static_call_sites section. */ #include <linux/types.h> @@ -64,7 +70,62 @@ extern void arch_static_call_transform(v extern typeof(func) STATIC_CALL_TRAMP(key) -#if defined(CONFIG_HAVE_STATIC_CALL) +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +struct static_call_key { + void *func, *tramp; + /* + * List of modules (including vmlinux) and their call sites associated + * with this key. + */ + struct list_head site_mods; +}; + +struct static_call_mod { + struct list_head list; + struct module *mod; /* for vmlinux, mod == NULL */ + struct static_call_site *sites; +}; + +extern void __static_call_update(struct static_call_key *key, void *func); +extern int static_call_mod_init(struct module *mod); + +#define DEFINE_STATIC_CALL(key, _func) \ + DECLARE_STATIC_CALL(key, _func); \ + struct static_call_key key = { \ + .func = _func, \ + .tramp = STATIC_CALL_TRAMP(key), \ + .site_mods = LIST_HEAD_INIT(key.site_mods), \ + }; \ + ARCH_DEFINE_STATIC_CALL_TRAMP(key, _func) + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so objtool can reference it when it generates the + * static_call_site structs. + */ +#define static_call(key, args...) \ +({ \ + __ADDRESSABLE(key); \ + STATIC_CALL_TRAMP(key)(args); \ +}) + +#define static_call_update(key, func) \ +({ \ + BUILD_BUG_ON(!__same_type(func, STATIC_CALL_TRAMP(key))); \ + __static_call_update(&key, func); \ +}) + +#define EXPORT_STATIC_CALL(key) \ + EXPORT_SYMBOL(key); \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(key)) + +#define EXPORT_STATIC_CALL_GPL(key) \ + EXPORT_SYMBOL_GPL(key); \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(key)) + + +#elif defined(CONFIG_HAVE_STATIC_CALL) struct static_call_key { void *func, *tramp; --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -10,4 +10,13 @@ #define STATIC_CALL_TRAMP(key) __PASTE(STATIC_CALL_TRAMP_PREFIX, key) #define STATIC_CALL_TRAMP_STR(key) __stringify(STATIC_CALL_TRAMP(key)) +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + #endif /* _STATIC_CALL_TYPES_H */ --- a/kernel/Makefile +++ b/kernel/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ +obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o obj-$(CONFIG_PERF_EVENTS) += events/ --- a/kernel/module.c +++ b/kernel/module.c @@ -3117,6 +3117,11 @@ static int find_module_sections(struct m sizeof(*mod->ei_funcs), &mod->num_ei_funcs); #endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + mod->static_call_sites = section_objs(info, ".static_call_sites", + sizeof(*mod->static_call_sites), + &mod->num_static_call_sites); +#endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); --- /dev/null +++ b/kernel/static_call.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/static_call.h> +#include <linux/bug.h> +#include <linux/smp.h> +#include <linux/sort.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/cpu.h> +#include <linux/processor.h> +#include <asm/sections.h> + +extern struct static_call_site __start_static_call_sites[], + __stop_static_call_sites[]; + +static bool static_call_initialized; + +#define STATIC_CALL_INIT 1UL + +/* mutex to protect key modules/sites */ +static DEFINE_MUTEX(static_call_mutex); + +static void static_call_lock(void) +{ + mutex_lock(&static_call_mutex); +} + +static void static_call_unlock(void) +{ + mutex_unlock(&static_call_mutex); +} + +static inline void *static_call_addr(struct static_call_site *site) +{ + return (void *)((long)site->addr + (long)&site->addr); +} + + +static inline struct static_call_key *static_call_key(const struct static_call_site *site) +{ + return (struct static_call_key *) + (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT); +} + +/* These assume the key is word-aligned. */ +static inline bool static_call_is_init(struct static_call_site *site) +{ + return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT; +} + +static inline void static_call_set_init(struct static_call_site *site) +{ + site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) - + (long)&site->key; +} + +static int static_call_site_cmp(const void *_a, const void *_b) +{ + const struct static_call_site *a = _a; + const struct static_call_site *b = _b; + const struct static_call_key *key_a = static_call_key(a); + const struct static_call_key *key_b = static_call_key(b); + + if (key_a < key_b) + return -1; + + if (key_a > key_b) + return 1; + + return 0; +} + +static void static_call_site_swap(void *_a, void *_b, int size) +{ + long delta = (unsigned long)_a - (unsigned long)_b; + struct static_call_site *a = _a; + struct static_call_site *b = _b; + struct static_call_site tmp = *a; + + a->addr = b->addr - delta; + a->key = b->key - delta; + + b->addr = tmp.addr + delta; + b->key = tmp.key + delta; +} + +static inline void static_call_sort_entries(struct static_call_site *start, + struct static_call_site *stop) +{ + sort(start, stop - start, sizeof(struct static_call_site), + static_call_site_cmp, static_call_site_swap); +} + +void __static_call_update(struct static_call_key *key, void *func) +{ + struct static_call_mod *site_mod; + struct static_call_site *site, *stop; + + cpus_read_lock(); + static_call_lock(); + + if (key->func == func) + goto done; + + key->func = func; + + /* + * If called before init, leave the call sites unpatched for now. + * In the meantime they'll continue to call the temporary trampoline. + */ + if (!static_call_initialized) + goto done; + + list_for_each_entry(site_mod, &key->site_mods, list) { + if (!site_mod->sites) { + /* + * This can happen if the static call key is defined in + * a module which doesn't use it. + */ + continue; + } + + stop = __stop_static_call_sites; + +#ifdef CONFIG_MODULES + if (site_mod->mod) { + stop = site_mod->mod->static_call_sites + + site_mod->mod->num_static_call_sites; + } +#endif + + for (site = site_mod->sites; + site < stop && static_call_key(site) == key; site++) { + void *site_addr = static_call_addr(site); + struct module *mod = site_mod->mod; + + if (static_call_is_init(site)) { + /* + * Don't write to call sites which were in + * initmem and have since been freed. + */ + if (!mod && system_state >= SYSTEM_RUNNING) + continue; + if (mod && !within_module_init((unsigned long)site_addr, mod)) + continue; + } + + if (!kernel_text_address((unsigned long)site_addr)) { + WARN_ONCE(1, "can't patch static call site at %pS", + site_addr); + continue; + } + + arch_static_call_transform(site_addr, key->tramp, func); + } + } + +done: + static_call_unlock(); + cpus_read_unlock(); +} +EXPORT_SYMBOL_GPL(__static_call_update); + +#ifdef CONFIG_MODULES + +static int static_call_add_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + if (start == stop) + return 0; + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + void *site_addr = static_call_addr(site); + + if (within_module_init((unsigned long)site_addr, mod)) + static_call_set_init(site); + + key = static_call_key(site); + if (key != prev_key) { + prev_key = key; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + + site_mod->mod = mod; + site_mod->sites = site; + list_add_tail(&site_mod->list, &key->site_mods); + } + + arch_static_call_transform(site_addr, key->tramp, key->func); + } + + return 0; +} + +static void static_call_del_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + for (site = start; site < stop; site++) { + key = static_call_key(site); + if (key == prev_key) + continue; + prev_key = key; + + list_for_each_entry(site_mod, &key->site_mods, list) { + if (site_mod->mod == mod) { + list_del(&site_mod->list); + kfree(site_mod); + break; + } + } + } +} + +static int static_call_module_notify(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + cpus_read_lock(); + static_call_lock(); + + switch (val) { + case MODULE_STATE_COMING: + module_disable_ro(mod); + ret = static_call_add_module(mod); + module_enable_ro(mod, false); + if (ret) { + WARN(1, "Failed to allocate memory for static calls"); + static_call_del_module(mod); + } + break; + case MODULE_STATE_GOING: + static_call_del_module(mod); + break; + } + + static_call_unlock(); + cpus_read_unlock(); + + return notifier_from_errno(ret); +} + +static struct notifier_block static_call_module_nb = { + .notifier_call = static_call_module_notify, +}; + +#endif /* CONFIG_MODULES */ + +static void __init static_call_init(void) +{ + struct static_call_site *start = __start_static_call_sites; + struct static_call_site *stop = __stop_static_call_sites; + struct static_call_site *site; + + if (start == stop) { + pr_warn("WARNING: empty static call table\n"); + return; + } + + cpus_read_lock(); + static_call_lock(); + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + struct static_call_key *key = static_call_key(site); + void *site_addr = static_call_addr(site); + + if (init_section_contains(site_addr, 1)) + static_call_set_init(site); + + if (list_empty(&key->site_mods)) { + struct static_call_mod *site_mod; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) { + WARN(1, "Failed to allocate memory for static calls"); + goto done; + } + + site_mod->sites = site; + list_add_tail(&site_mod->list, &key->site_mods); + } + + arch_static_call_transform(site_addr, key->tramp, key->func); + } + + static_call_initialized = true; + +done: + static_call_unlock(); + cpus_read_unlock(); + +#ifdef CONFIG_MODULES + if (static_call_initialized) + register_module_notifier(&static_call_module_nb); +#endif +} +early_initcall(static_call_init);

