On Monday 03/25 at 11:56 +0900, Masami Hiramatsu wrote:
> Hi Jarkko,
> 
> On Sun, 24 Mar 2024 01:29:08 +0200
> Jarkko Sakkinen <jar...@kernel.org> wrote:
> 
> > Tracing with kprobes while running a monolithic kernel is currently
> > impossible due the kernel module allocator dependency.
> > 
> > Address the issue by allowing architectures to implement module_alloc()
> > and module_memfree() independent of the module subsystem. An arch tree
> > can signal this by setting HAVE_KPROBES_ALLOC in its Kconfig file.
> > 
> > Realize the feature on RISC-V by separating allocator to module_alloc.c
> > and implementing module_memfree().
> 
> Even though, this involves changes in arch-independent part. So it should
> be solved by generic way. Did you checked Calvin's thread?
> 
> https://lore.kernel.org/all/cover.1709676663.git.jcalvinow...@gmail.com/

FYI, I should have v2 of that series out later this week.

Thanks,
Calvin

> I think, we'd better to introduce `alloc_execmem()`,
> CONFIG_HAVE_ALLOC_EXECMEM and CONFIG_ALLOC_EXECMEM at first
> 
>   config HAVE_ALLOC_EXECMEM
>       bool
> 
>   config ALLOC_EXECMEM
>       bool "Executable trampline memory allocation"
>       depends on MODULES || HAVE_ALLOC_EXECMEM
> 
> And define fallback macro to module_alloc() like this.
> 
> #ifndef CONFIG_HAVE_ALLOC_EXECMEM
> #define alloc_execmem(size, gfp)      module_alloc(size)
> #endif
> 
> Then, introduce a new dependency to kprobes
> 
>   config KPROBES
>       bool "Kprobes"
>       select ALLOC_EXECMEM
> 
> and update kprobes to use alloc_execmem and remove module related
> code from it.
> 
> You also should consider using IS_ENABLED(CONFIG_MODULE) in the code to
> avoid using #ifdefs.
> 
> Finally, you can add RISCV implementation patch of HAVE_ALLOC_EXECMEM in the
> next patch.
> 
> Thank you,
> 
> 
> > 
> > Link: https://www.sochub.fi # for power on testing new SoC's with a minimal 
> > stack
> > Link: 
> > https://lore.kernel.org/all/20220608000014.3054333-1-jar...@profian.com/ # 
> > continuation
> > Signed-off-by: Jarkko Sakkinen <jar...@kernel.org>
> > ---
> > v2:
> > - Better late than never right? :-)
> > - Focus only to RISC-V for now to make the patch more digestable. This
> >   is the arch where I use the patch on a daily basis to help with QA.
> > - Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
> > ---
> >  arch/Kconfig                     |  8 +++++++-
> >  arch/riscv/Kconfig               |  1 +
> >  arch/riscv/kernel/Makefile       |  5 +++++
> >  arch/riscv/kernel/module.c       | 11 -----------
> >  arch/riscv/kernel/module_alloc.c | 28 ++++++++++++++++++++++++++++
> >  kernel/kprobes.c                 | 10 ++++++++++
> >  kernel/trace/trace_kprobe.c      | 18 ++++++++++++++++--
> >  7 files changed, 67 insertions(+), 14 deletions(-)
> >  create mode 100644 arch/riscv/kernel/module_alloc.c
> > 
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index a5af0edd3eb8..c931f1de98a7 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -52,7 +52,7 @@ config GENERIC_ENTRY
> >  
> >  config KPROBES
> >     bool "Kprobes"
> > -   depends on MODULES
> > +   depends on MODULES || HAVE_KPROBES_ALLOC
> >     depends on HAVE_KPROBES
> >     select KALLSYMS
> >     select TASKS_RCU if PREEMPTION
> > @@ -215,6 +215,12 @@ config HAVE_OPTPROBES
> >  config HAVE_KPROBES_ON_FTRACE
> >     bool
> >  
> > +config HAVE_KPROBES_ALLOC
> > +   bool
> > +   help
> > +     Architectures that select this option are capable of allocating memory
> > +     for kprobes withou the kernel module allocator.
> > +
> >  config ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
> >     bool
> >     help
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index e3142ce531a0..4f1b925e83d8 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -132,6 +132,7 @@ config RISCV
> >     select HAVE_KPROBES if !XIP_KERNEL
> >     select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
> >     select HAVE_KRETPROBES if !XIP_KERNEL
> > +   select HAVE_KPROBES_ALLOC if !XIP_KERNEL
> >     # https://github.com/ClangBuiltLinux/linux/issues/1881
> >     select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
> >     select HAVE_MOVE_PMD
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index 604d6bf7e476..46318194bce1 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -73,6 +73,11 @@ obj-$(CONFIG_SMP)                += cpu_ops.o
> >  
> >  obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
> >  obj-$(CONFIG_MODULES)              += module.o
> > +ifeq ($(CONFIG_MODULES),y)
> > +obj-y                              += module_alloc.o
> > +else
> > +obj-$(CONFIG_KPROBES)              += module_alloc.o
> > +endif
> >  obj-$(CONFIG_MODULE_SECTIONS)      += module-sections.o
> >  
> >  obj-$(CONFIG_CPU_PM)               += suspend_entry.o suspend.o
> > diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
> > index 5e5a82644451..cc324b450f2e 100644
> > --- a/arch/riscv/kernel/module.c
> > +++ b/arch/riscv/kernel/module.c
> > @@ -905,17 +905,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
> > *strtab,
> >     return 0;
> >  }
> >  
> > -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
> > -void *module_alloc(unsigned long size)
> > -{
> > -   return __vmalloc_node_range(size, 1, MODULES_VADDR,
> > -                               MODULES_END, GFP_KERNEL,
> > -                               PAGE_KERNEL, VM_FLUSH_RESET_PERMS,
> > -                               NUMA_NO_NODE,
> > -                               __builtin_return_address(0));
> > -}
> > -#endif
> > -
> >  int module_finalize(const Elf_Ehdr *hdr,
> >                 const Elf_Shdr *sechdrs,
> >                 struct module *me)
> > diff --git a/arch/riscv/kernel/module_alloc.c 
> > b/arch/riscv/kernel/module_alloc.c
> > new file mode 100644
> > index 000000000000..3d9aa8dbca8a
> > --- /dev/null
> > +++ b/arch/riscv/kernel/module_alloc.c
> > @@ -0,0 +1,28 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + *  Copyright (c) 2017 Zihao Yu
> > + *  Copyright (c) 2024 Jarkko Sakkinen
> > + */
> > +
> > +#include <linux/mm.h>
> > +#include <linux/moduleloader.h>
> > +#include <linux/vmalloc.h>
> > +#include <asm/sections.h>
> > +
> > +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
> > +void *module_alloc(unsigned long size)
> > +{
> > +   return __vmalloc_node_range(size, 1, MODULES_VADDR,
> > +                               MODULES_END, GFP_KERNEL,
> > +                               PAGE_KERNEL, 0, NUMA_NO_NODE,
> > +                               __builtin_return_address(0));
> > +}
> > +
> > +void module_memfree(void *module_region)
> > +{
> > +   if (in_interrupt())
> > +           pr_warn("In interrupt context: vmalloc may not work.\n");
> > +
> > +   vfree(module_region);
> > +}
> > +#endif
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 9d9095e81792..2c583ab6efc4 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -1580,6 +1580,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
> >             goto out;
> >     }
> >  
> > +#ifdef CONFIG_MODULES
> >     /* Check if 'p' is probing a module. */
> >     *probed_mod = __module_text_address((unsigned long) p->addr);
> >     if (*probed_mod) {
> > @@ -1603,6 +1604,8 @@ static int check_kprobe_address_safe(struct kprobe *p,
> >                     ret = -ENOENT;
> >             }
> >     }
> > +#endif
> > +
> >  out:
> >     preempt_enable();
> >     jump_label_unlock();
> > @@ -2482,6 +2485,7 @@ int kprobe_add_area_blacklist(unsigned long start, 
> > unsigned long end)
> >     return 0;
> >  }
> >  
> > +#ifdef CONFIG_MODULES
> >  /* Remove all symbols in given area from kprobe blacklist */
> >  static void kprobe_remove_area_blacklist(unsigned long start, unsigned 
> > long end)
> >  {
> > @@ -2499,6 +2503,7 @@ static void kprobe_remove_ksym_blacklist(unsigned 
> > long entry)
> >  {
> >     kprobe_remove_area_blacklist(entry, entry + 1);
> >  }
> > +#endif /* CONFIG_MODULES */
> >  
> >  int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long 
> > *value,
> >                                char *type, char *sym)
> > @@ -2564,6 +2569,7 @@ static int __init populate_kprobe_blacklist(unsigned 
> > long *start,
> >     return ret ? : arch_populate_kprobe_blacklist();
> >  }
> >  
> > +#ifdef CONFIG_MODULES
> >  static void add_module_kprobe_blacklist(struct module *mod)
> >  {
> >     unsigned long start, end;
> > @@ -2665,6 +2671,7 @@ static struct notifier_block kprobe_module_nb = {
> >     .notifier_call = kprobes_module_callback,
> >     .priority = 0
> >  };
> > +#endif /* CONFIG_MODULES */
> >  
> >  void kprobe_free_init_mem(void)
> >  {
> > @@ -2724,8 +2731,11 @@ static int __init init_kprobes(void)
> >     err = arch_init_kprobes();
> >     if (!err)
> >             err = register_die_notifier(&kprobe_exceptions_nb);
> > +
> > +#ifdef CONFIG_MODULES
> >     if (!err)
> >             err = register_module_notifier(&kprobe_module_nb);
> > +#endif
> >  
> >     kprobes_initialized = (err == 0);
> >     kprobe_sysctls_init();
> > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> > index c4c6e0e0068b..f8fbd5e76dda 100644
> > --- a/kernel/trace/trace_kprobe.c
> > +++ b/kernel/trace/trace_kprobe.c
> > @@ -111,6 +111,7 @@ static nokprobe_inline bool 
> > trace_kprobe_within_module(struct trace_kprobe *tk,
> >     return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
> >  }
> >  
> > +#ifdef CONFIG_MODULES
> >  static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe 
> > *tk)
> >  {
> >     char *p;
> > @@ -129,6 +130,7 @@ static nokprobe_inline bool 
> > trace_kprobe_module_exist(struct trace_kprobe *tk)
> >  
> >     return ret;
> >  }
> > +#endif /* CONFIG_MODULES */
> >  
> >  static bool trace_kprobe_is_busy(struct dyn_event *ev)
> >  {
> > @@ -608,7 +610,11 @@ static int append_trace_kprobe(struct trace_kprobe 
> > *tk, struct trace_kprobe *to)
> >  
> >     /* Register k*probe */
> >     ret = __register_trace_kprobe(tk);
> > -   if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
> > +#ifdef CONFIG_MODULES
> > +   if (ret == -ENOENT && trace_kprobe_module_exist(tk))
> > +           ret = 0;
> > +#endif /* CONFIG_MODULES */
> > +   if (ret == -ENOENT) {
> >             pr_warn("This probe might be able to register after target 
> > module is loaded. Continue.\n");
> >             ret = 0;
> >     }
> > @@ -655,7 +661,11 @@ static int register_trace_kprobe(struct trace_kprobe 
> > *tk)
> >  
> >     /* Register k*probe */
> >     ret = __register_trace_kprobe(tk);
> > -   if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
> > +#ifdef CONFIG_MODULES
> > +   if (ret == -ENOENT && trace_kprobe_module_exist(tk))
> > +           ret = 0;
> > +#endif /* CONFIG_MODULES */
> > +   if (ret == -ENOENT) {
> >             pr_warn("This probe might be able to register after target 
> > module is loaded. Continue.\n");
> >             ret = 0;
> >     }
> > @@ -670,6 +680,7 @@ static int register_trace_kprobe(struct trace_kprobe 
> > *tk)
> >     return ret;
> >  }
> >  
> > +#ifdef CONFIG_MODULES
> >  /* Module notifier call back, checking event on the module */
> >  static int trace_kprobe_module_callback(struct notifier_block *nb,
> >                                    unsigned long val, void *data)
> > @@ -704,6 +715,7 @@ static struct notifier_block trace_kprobe_module_nb = {
> >     .notifier_call = trace_kprobe_module_callback,
> >     .priority = 1   /* Invoked after kprobe module callback */
> >  };
> > +#endif /* CONFIG_MODULES */
> >  
> >  static int count_symbols(void *data, unsigned long unused)
> >  {
> > @@ -1897,8 +1909,10 @@ static __init int init_kprobe_trace_early(void)
> >     if (ret)
> >             return ret;
> >  
> > +#ifdef CONFIG_MODULES
> >     if (register_module_notifier(&trace_kprobe_module_nb))
> >             return -EINVAL;
> > +#endif /* CONFIG_MODULES */
> >  
> >     return 0;
> >  }
> > -- 
> > 2.44.0
> > 
> 
> 
> -- 
> Masami Hiramatsu (Google) <mhira...@kernel.org>

Reply via email to