Hi Jarkko, On Sun, 24 Mar 2024 01:29:08 +0200 Jarkko Sakkinen <jar...@kernel.org> wrote:
> Tracing with kprobes while running a monolithic kernel is currently > impossible due the kernel module allocator dependency. > > Address the issue by allowing architectures to implement module_alloc() > and module_memfree() independent of the module subsystem. An arch tree > can signal this by setting HAVE_KPROBES_ALLOC in its Kconfig file. > > Realize the feature on RISC-V by separating allocator to module_alloc.c > and implementing module_memfree(). Even though, this involves changes in arch-independent part. So it should be solved by generic way. Did you checked Calvin's thread? https://lore.kernel.org/all/cover.1709676663.git.jcalvinow...@gmail.com/ I think, we'd better to introduce `alloc_execmem()`, CONFIG_HAVE_ALLOC_EXECMEM and CONFIG_ALLOC_EXECMEM at first config HAVE_ALLOC_EXECMEM bool config ALLOC_EXECMEM bool "Executable trampline memory allocation" depends on MODULES || HAVE_ALLOC_EXECMEM And define fallback macro to module_alloc() like this. #ifndef CONFIG_HAVE_ALLOC_EXECMEM #define alloc_execmem(size, gfp) module_alloc(size) #endif Then, introduce a new dependency to kprobes config KPROBES bool "Kprobes" select ALLOC_EXECMEM and update kprobes to use alloc_execmem and remove module related code from it. You also should consider using IS_ENABLED(CONFIG_MODULE) in the code to avoid using #ifdefs. Finally, you can add RISCV implementation patch of HAVE_ALLOC_EXECMEM in the next patch. Thank you, > > Link: https://www.sochub.fi # for power on testing new SoC's with a minimal > stack > Link: > https://lore.kernel.org/all/20220608000014.3054333-1-jar...@profian.com/ # > continuation > Signed-off-by: Jarkko Sakkinen <jar...@kernel.org> > --- > v2: > - Better late than never right? :-) > - Focus only to RISC-V for now to make the patch more digestable. This > is the arch where I use the patch on a daily basis to help with QA. > - Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration. > --- > arch/Kconfig | 8 +++++++- > arch/riscv/Kconfig | 1 + > arch/riscv/kernel/Makefile | 5 +++++ > arch/riscv/kernel/module.c | 11 ----------- > arch/riscv/kernel/module_alloc.c | 28 ++++++++++++++++++++++++++++ > kernel/kprobes.c | 10 ++++++++++ > kernel/trace/trace_kprobe.c | 18 ++++++++++++++++-- > 7 files changed, 67 insertions(+), 14 deletions(-) > create mode 100644 arch/riscv/kernel/module_alloc.c > > diff --git a/arch/Kconfig b/arch/Kconfig > index a5af0edd3eb8..c931f1de98a7 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -52,7 +52,7 @@ config GENERIC_ENTRY > > config KPROBES > bool "Kprobes" > - depends on MODULES > + depends on MODULES || HAVE_KPROBES_ALLOC > depends on HAVE_KPROBES > select KALLSYMS > select TASKS_RCU if PREEMPTION > @@ -215,6 +215,12 @@ config HAVE_OPTPROBES > config HAVE_KPROBES_ON_FTRACE > bool > > +config HAVE_KPROBES_ALLOC > + bool > + help > + Architectures that select this option are capable of allocating memory > + for kprobes withou the kernel module allocator. > + > config ARCH_CORRECT_STACKTRACE_ON_KRETPROBE > bool > help > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index e3142ce531a0..4f1b925e83d8 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -132,6 +132,7 @@ config RISCV > select HAVE_KPROBES if !XIP_KERNEL > select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL > select HAVE_KRETPROBES if !XIP_KERNEL > + select HAVE_KPROBES_ALLOC if !XIP_KERNEL > # https://github.com/ClangBuiltLinux/linux/issues/1881 > select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD > select HAVE_MOVE_PMD > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile > index 604d6bf7e476..46318194bce1 100644 > --- a/arch/riscv/kernel/Makefile > +++ b/arch/riscv/kernel/Makefile > @@ -73,6 +73,11 @@ obj-$(CONFIG_SMP) += cpu_ops.o > > obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o > obj-$(CONFIG_MODULES) += module.o > +ifeq ($(CONFIG_MODULES),y) > +obj-y += module_alloc.o > +else > +obj-$(CONFIG_KPROBES) += module_alloc.o > +endif > obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o > > obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o > diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c > index 5e5a82644451..cc324b450f2e 100644 > --- a/arch/riscv/kernel/module.c > +++ b/arch/riscv/kernel/module.c > @@ -905,17 +905,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char > *strtab, > return 0; > } > > -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) > -void *module_alloc(unsigned long size) > -{ > - return __vmalloc_node_range(size, 1, MODULES_VADDR, > - MODULES_END, GFP_KERNEL, > - PAGE_KERNEL, VM_FLUSH_RESET_PERMS, > - NUMA_NO_NODE, > - __builtin_return_address(0)); > -} > -#endif > - > int module_finalize(const Elf_Ehdr *hdr, > const Elf_Shdr *sechdrs, > struct module *me) > diff --git a/arch/riscv/kernel/module_alloc.c > b/arch/riscv/kernel/module_alloc.c > new file mode 100644 > index 000000000000..3d9aa8dbca8a > --- /dev/null > +++ b/arch/riscv/kernel/module_alloc.c > @@ -0,0 +1,28 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (c) 2017 Zihao Yu > + * Copyright (c) 2024 Jarkko Sakkinen > + */ > + > +#include <linux/mm.h> > +#include <linux/moduleloader.h> > +#include <linux/vmalloc.h> > +#include <asm/sections.h> > + > +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) > +void *module_alloc(unsigned long size) > +{ > + return __vmalloc_node_range(size, 1, MODULES_VADDR, > + MODULES_END, GFP_KERNEL, > + PAGE_KERNEL, 0, NUMA_NO_NODE, > + __builtin_return_address(0)); > +} > + > +void module_memfree(void *module_region) > +{ > + if (in_interrupt()) > + pr_warn("In interrupt context: vmalloc may not work.\n"); > + > + vfree(module_region); > +} > +#endif > diff --git a/kernel/kprobes.c b/kernel/kprobes.c > index 9d9095e81792..2c583ab6efc4 100644 > --- a/kernel/kprobes.c > +++ b/kernel/kprobes.c > @@ -1580,6 +1580,7 @@ static int check_kprobe_address_safe(struct kprobe *p, > goto out; > } > > +#ifdef CONFIG_MODULES > /* Check if 'p' is probing a module. */ > *probed_mod = __module_text_address((unsigned long) p->addr); > if (*probed_mod) { > @@ -1603,6 +1604,8 @@ static int check_kprobe_address_safe(struct kprobe *p, > ret = -ENOENT; > } > } > +#endif > + > out: > preempt_enable(); > jump_label_unlock(); > @@ -2482,6 +2485,7 @@ int kprobe_add_area_blacklist(unsigned long start, > unsigned long end) > return 0; > } > > +#ifdef CONFIG_MODULES > /* Remove all symbols in given area from kprobe blacklist */ > static void kprobe_remove_area_blacklist(unsigned long start, unsigned long > end) > { > @@ -2499,6 +2503,7 @@ static void kprobe_remove_ksym_blacklist(unsigned long > entry) > { > kprobe_remove_area_blacklist(entry, entry + 1); > } > +#endif /* CONFIG_MODULES */ > > int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long > *value, > char *type, char *sym) > @@ -2564,6 +2569,7 @@ static int __init populate_kprobe_blacklist(unsigned > long *start, > return ret ? : arch_populate_kprobe_blacklist(); > } > > +#ifdef CONFIG_MODULES > static void add_module_kprobe_blacklist(struct module *mod) > { > unsigned long start, end; > @@ -2665,6 +2671,7 @@ static struct notifier_block kprobe_module_nb = { > .notifier_call = kprobes_module_callback, > .priority = 0 > }; > +#endif /* CONFIG_MODULES */ > > void kprobe_free_init_mem(void) > { > @@ -2724,8 +2731,11 @@ static int __init init_kprobes(void) > err = arch_init_kprobes(); > if (!err) > err = register_die_notifier(&kprobe_exceptions_nb); > + > +#ifdef CONFIG_MODULES > if (!err) > err = register_module_notifier(&kprobe_module_nb); > +#endif > > kprobes_initialized = (err == 0); > kprobe_sysctls_init(); > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c > index c4c6e0e0068b..f8fbd5e76dda 100644 > --- a/kernel/trace/trace_kprobe.c > +++ b/kernel/trace/trace_kprobe.c > @@ -111,6 +111,7 @@ static nokprobe_inline bool > trace_kprobe_within_module(struct trace_kprobe *tk, > return strncmp(module_name(mod), name, len) == 0 && name[len] == ':'; > } > > +#ifdef CONFIG_MODULES > static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe > *tk) > { > char *p; > @@ -129,6 +130,7 @@ static nokprobe_inline bool > trace_kprobe_module_exist(struct trace_kprobe *tk) > > return ret; > } > +#endif /* CONFIG_MODULES */ > > static bool trace_kprobe_is_busy(struct dyn_event *ev) > { > @@ -608,7 +610,11 @@ static int append_trace_kprobe(struct trace_kprobe *tk, > struct trace_kprobe *to) > > /* Register k*probe */ > ret = __register_trace_kprobe(tk); > - if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { > +#ifdef CONFIG_MODULES > + if (ret == -ENOENT && trace_kprobe_module_exist(tk)) > + ret = 0; > +#endif /* CONFIG_MODULES */ > + if (ret == -ENOENT) { > pr_warn("This probe might be able to register after target > module is loaded. Continue.\n"); > ret = 0; > } > @@ -655,7 +661,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk) > > /* Register k*probe */ > ret = __register_trace_kprobe(tk); > - if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { > +#ifdef CONFIG_MODULES > + if (ret == -ENOENT && trace_kprobe_module_exist(tk)) > + ret = 0; > +#endif /* CONFIG_MODULES */ > + if (ret == -ENOENT) { > pr_warn("This probe might be able to register after target > module is loaded. Continue.\n"); > ret = 0; > } > @@ -670,6 +680,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk) > return ret; > } > > +#ifdef CONFIG_MODULES > /* Module notifier call back, checking event on the module */ > static int trace_kprobe_module_callback(struct notifier_block *nb, > unsigned long val, void *data) > @@ -704,6 +715,7 @@ static struct notifier_block trace_kprobe_module_nb = { > .notifier_call = trace_kprobe_module_callback, > .priority = 1 /* Invoked after kprobe module callback */ > }; > +#endif /* CONFIG_MODULES */ > > static int count_symbols(void *data, unsigned long unused) > { > @@ -1897,8 +1909,10 @@ static __init int init_kprobe_trace_early(void) > if (ret) > return ret; > > +#ifdef CONFIG_MODULES > if (register_module_notifier(&trace_kprobe_module_nb)) > return -EINVAL; > +#endif /* CONFIG_MODULES */ > > return 0; > } > -- > 2.44.0 > -- Masami Hiramatsu (Google) <mhira...@kernel.org>