Tracing with kprobes while running a monolithic kernel is currently
impossible due the kernel module allocator dependency.

Address the issue by allowing architectures to implement module_alloc()
and module_memfree() independent of the module subsystem. An arch tree
can signal this by setting HAVE_KPROBES_ALLOC in its Kconfig file.

Realize the feature on RISC-V by separating allocator to module_alloc.c
and implementing module_memfree().

Link: https://www.sochub.fi # for power on testing new SoC's with a minimal 
stack
Link: https://lore.kernel.org/all/20220608000014.3054333-1-jar...@profian.com/ 
# continuation
Signed-off-by: Jarkko Sakkinen <jar...@kernel.org>
---
v2:
- Better late than never right? :-)
- Focus only to RISC-V for now to make the patch more digestable. This
  is the arch where I use the patch on a daily basis to help with QA.
- Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
---
 arch/Kconfig                     |  8 +++++++-
 arch/riscv/Kconfig               |  1 +
 arch/riscv/kernel/Makefile       |  5 +++++
 arch/riscv/kernel/module.c       | 11 -----------
 arch/riscv/kernel/module_alloc.c | 28 ++++++++++++++++++++++++++++
 kernel/kprobes.c                 | 10 ++++++++++
 kernel/trace/trace_kprobe.c      | 18 ++++++++++++++++--
 7 files changed, 67 insertions(+), 14 deletions(-)
 create mode 100644 arch/riscv/kernel/module_alloc.c

diff --git a/arch/Kconfig b/arch/Kconfig
index a5af0edd3eb8..c931f1de98a7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,7 +52,7 @@ config GENERIC_ENTRY
 
 config KPROBES
        bool "Kprobes"
-       depends on MODULES
+       depends on MODULES || HAVE_KPROBES_ALLOC
        depends on HAVE_KPROBES
        select KALLSYMS
        select TASKS_RCU if PREEMPTION
@@ -215,6 +215,12 @@ config HAVE_OPTPROBES
 config HAVE_KPROBES_ON_FTRACE
        bool
 
+config HAVE_KPROBES_ALLOC
+       bool
+       help
+         Architectures that select this option are capable of allocating memory
+         for kprobes withou the kernel module allocator.
+
 config ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
        bool
        help
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e3142ce531a0..4f1b925e83d8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -132,6 +132,7 @@ config RISCV
        select HAVE_KPROBES if !XIP_KERNEL
        select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
        select HAVE_KRETPROBES if !XIP_KERNEL
+       select HAVE_KPROBES_ALLOC if !XIP_KERNEL
        # https://github.com/ClangBuiltLinux/linux/issues/1881
        select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
        select HAVE_MOVE_PMD
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 604d6bf7e476..46318194bce1 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -73,6 +73,11 @@ obj-$(CONFIG_SMP)            += cpu_ops.o
 
 obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
 obj-$(CONFIG_MODULES)          += module.o
+ifeq ($(CONFIG_MODULES),y)
+obj-y                          += module_alloc.o
+else
+obj-$(CONFIG_KPROBES)          += module_alloc.o
+endif
 obj-$(CONFIG_MODULE_SECTIONS)  += module-sections.o
 
 obj-$(CONFIG_CPU_PM)           += suspend_entry.o suspend.o
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 5e5a82644451..cc324b450f2e 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -905,17 +905,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
        return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-void *module_alloc(unsigned long size)
-{
-       return __vmalloc_node_range(size, 1, MODULES_VADDR,
-                                   MODULES_END, GFP_KERNEL,
-                                   PAGE_KERNEL, VM_FLUSH_RESET_PERMS,
-                                   NUMA_NO_NODE,
-                                   __builtin_return_address(0));
-}
-#endif
-
 int module_finalize(const Elf_Ehdr *hdr,
                    const Elf_Shdr *sechdrs,
                    struct module *me)
diff --git a/arch/riscv/kernel/module_alloc.c b/arch/riscv/kernel/module_alloc.c
new file mode 100644
index 000000000000..3d9aa8dbca8a
--- /dev/null
+++ b/arch/riscv/kernel/module_alloc.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (c) 2017 Zihao Yu
+ *  Copyright (c) 2024 Jarkko Sakkinen
+ */
+
+#include <linux/mm.h>
+#include <linux/moduleloader.h>
+#include <linux/vmalloc.h>
+#include <asm/sections.h>
+
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+void *module_alloc(unsigned long size)
+{
+       return __vmalloc_node_range(size, 1, MODULES_VADDR,
+                                   MODULES_END, GFP_KERNEL,
+                                   PAGE_KERNEL, 0, NUMA_NO_NODE,
+                                   __builtin_return_address(0));
+}
+
+void module_memfree(void *module_region)
+{
+       if (in_interrupt())
+               pr_warn("In interrupt context: vmalloc may not work.\n");
+
+       vfree(module_region);
+}
+#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9d9095e81792..2c583ab6efc4 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1580,6 +1580,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
                goto out;
        }
 
+#ifdef CONFIG_MODULES
        /* Check if 'p' is probing a module. */
        *probed_mod = __module_text_address((unsigned long) p->addr);
        if (*probed_mod) {
@@ -1603,6 +1604,8 @@ static int check_kprobe_address_safe(struct kprobe *p,
                        ret = -ENOENT;
                }
        }
+#endif
+
 out:
        preempt_enable();
        jump_label_unlock();
@@ -2482,6 +2485,7 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
        return 0;
 }
 
+#ifdef CONFIG_MODULES
 /* Remove all symbols in given area from kprobe blacklist */
 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
 {
@@ -2499,6 +2503,7 @@ static void kprobe_remove_ksym_blacklist(unsigned long 
entry)
 {
        kprobe_remove_area_blacklist(entry, entry + 1);
 }
+#endif /* CONFIG_MODULES */
 
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
                                   char *type, char *sym)
@@ -2564,6 +2569,7 @@ static int __init populate_kprobe_blacklist(unsigned long 
*start,
        return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
 static void add_module_kprobe_blacklist(struct module *mod)
 {
        unsigned long start, end;
@@ -2665,6 +2671,7 @@ static struct notifier_block kprobe_module_nb = {
        .notifier_call = kprobes_module_callback,
        .priority = 0
 };
+#endif /* CONFIG_MODULES */
 
 void kprobe_free_init_mem(void)
 {
@@ -2724,8 +2731,11 @@ static int __init init_kprobes(void)
        err = arch_init_kprobes();
        if (!err)
                err = register_die_notifier(&kprobe_exceptions_nb);
+
+#ifdef CONFIG_MODULES
        if (!err)
                err = register_module_notifier(&kprobe_module_nb);
+#endif
 
        kprobes_initialized = (err == 0);
        kprobe_sysctls_init();
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c4c6e0e0068b..f8fbd5e76dda 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -111,6 +111,7 @@ static nokprobe_inline bool 
trace_kprobe_within_module(struct trace_kprobe *tk,
        return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
 }
 
+#ifdef CONFIG_MODULES
 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
 {
        char *p;
@@ -129,6 +130,7 @@ static nokprobe_inline bool 
trace_kprobe_module_exist(struct trace_kprobe *tk)
 
        return ret;
 }
+#endif /* CONFIG_MODULES */
 
 static bool trace_kprobe_is_busy(struct dyn_event *ev)
 {
@@ -608,7 +610,11 @@ static int append_trace_kprobe(struct trace_kprobe *tk, 
struct trace_kprobe *to)
 
        /* Register k*probe */
        ret = __register_trace_kprobe(tk);
-       if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+#ifdef CONFIG_MODULES
+       if (ret == -ENOENT && trace_kprobe_module_exist(tk))
+               ret = 0;
+#endif /* CONFIG_MODULES */
+       if (ret == -ENOENT) {
                pr_warn("This probe might be able to register after target 
module is loaded. Continue.\n");
                ret = 0;
        }
@@ -655,7 +661,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 
        /* Register k*probe */
        ret = __register_trace_kprobe(tk);
-       if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+#ifdef CONFIG_MODULES
+       if (ret == -ENOENT && trace_kprobe_module_exist(tk))
+               ret = 0;
+#endif /* CONFIG_MODULES */
+       if (ret == -ENOENT) {
                pr_warn("This probe might be able to register after target 
module is loaded. Continue.\n");
                ret = 0;
        }
@@ -670,6 +680,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
        return ret;
 }
 
+#ifdef CONFIG_MODULES
 /* Module notifier call back, checking event on the module */
 static int trace_kprobe_module_callback(struct notifier_block *nb,
                                       unsigned long val, void *data)
@@ -704,6 +715,7 @@ static struct notifier_block trace_kprobe_module_nb = {
        .notifier_call = trace_kprobe_module_callback,
        .priority = 1   /* Invoked after kprobe module callback */
 };
+#endif /* CONFIG_MODULES */
 
 static int count_symbols(void *data, unsigned long unused)
 {
@@ -1897,8 +1909,10 @@ static __init int init_kprobe_trace_early(void)
        if (ret)
                return ret;
 
+#ifdef CONFIG_MODULES
        if (register_module_notifier(&trace_kprobe_module_nb))
                return -EINVAL;
+#endif /* CONFIG_MODULES */
 
        return 0;
 }
-- 
2.44.0


Reply via email to