Re: [PATCH v3] mm/mm_init: use node's number of cpus in deferred_page_init_max_threads

2024-05-28 Thread Mike Rapoport
On Tue, May 28, 2024 at 02:54:58PM -0400, Eric Chanudet wrote:
> When DEFERRED_STRUCT_PAGE_INIT=y, use a node's cpu count as maximum
> thread count for the deferred initialization of struct pages via padata.
> This should result in shorter boot times for these configurations by
> going through page_alloc_init_late() faster as systems tend not to be
> under heavy load that early in the bootstrap.
> 
> Only x86_64 does that now. Make it archs agnostic when
> DEFERRED_STRUCT_PAGE_INIT is set. With the default defconfigs, that
> includes powerpc and s390.
> 
> It used to be so before offering archs to override the function for
> tuning with commit ecd096506922 ("mm: make deferred init's max threads
> arch-specific").
> 
> Setting DEFERRED_STRUCT_PAGE_INIT and testing on a few arm64 platforms
> shows faster deferred_init_memmap completions:
> | | x13s| SA8775p-ride | Ampere R137-P31 | Ampere HR330 |
> | | Metal, 32GB | VM, 36GB | VM, 58GB| Metal, 128GB |
> | | 8cpus   | 8cpus| 8cpus   | 32cpus   |
> |-|-|--|-|--|
> | threads |  ms (%) | ms   (%) |  ms (%) |  ms  (%) |
> |-|-|--|-|--|
> | 1   | 108(0%) | 72  (0%) | 224(0%) | 324 (0%) |
> | cpus|  24  (-77%) | 36(-50%) |  40  (-82%) |  56   (-82%) |
> 
> Michael Ellerman on a powerpc machine (1TB, 40 cores, 4KB pages) reports
> faster deferred_init_memmap from 210-240ms to 90-110ms between nodes.
> 
> Signed-off-by: Eric Chanudet 
> Tested-by: Michael Ellerman  (powerpc)

Acked-by: Mike Rapoport (IBM) 

> ---
> - v1: 
> https://lore.kernel.org/linux-arm-kernel/20240520231555.395979-5-echan...@redhat.com
> - Changes since v1:
>  - Make the generic function return the number of cpus of the node as
>max threads limit instead overriding it for arm64.
>  - Drop Baoquan He's R-b on v1 since the logic changed.
>  - Add CCs according to patch changes (ppc and s390 set
>DEFERRED_STRUCT_PAGE_INIT by default).
> 
> - v2: 
> https://lore.kernel.org/linux-arm-kernel/20240522203758.626932-4-echan...@redhat.com/
> - Changes since v2:
>  - deferred_page_init_max_threads returns unsigned and use max instead
>of max_t.
>  - Make deferred_page_init_max_threads static since there are no more
>override.
>  - Rephrase description.
>  - Add T-b and report from Michael Ellerman.
> 
>  arch/x86/mm/init_64.c| 12 
>  include/linux/memblock.h |  2 --
>  mm/mm_init.c |  5 ++---
>  3 files changed, 2 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 7e177856ee4f..adec42928ec1 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1354,18 +1354,6 @@ void __init mem_init(void)
>   preallocate_vmalloc_pages();
>  }
>  
> -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> -int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask)
> -{
> - /*
> -  * More CPUs always led to greater speedups on tested systems, up to
> -  * all the nodes' CPUs.  Use all since the system is otherwise idle
> -  * now.
> -  */
> - return max_t(int, cpumask_weight(node_cpumask), 1);
> -}
> -#endif
> -
>  int kernel_set_to_readonly;
>  
>  void mark_rodata_ro(void)
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index e2082240586d..40c62aca36ec 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -335,8 +335,6 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone 
> *zone,
>   for (; i != U64_MAX;  \
>__next_mem_pfn_range_in_zone(, zone, p_start, p_end))
>  
> -int __init deferred_page_init_max_threads(const struct cpumask 
> *node_cpumask);
> -
>  #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
>  
>  /**
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index f72b852bd5b8..acfeba508796 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -2122,11 +2122,10 @@ deferred_init_memmap_chunk(unsigned long start_pfn, 
> unsigned long end_pfn,
>   }
>  }
>  
> -/* An arch may override for more concurrency. */
> -__weak int __init
> +static unsigned int __init
>  deferred_page_init_max_threads(const struct cpumask *node_cpumask)
>  {
> - return 1;
> + return max(cpumask_weight(node_cpumask), 1U);
>  }
>  
>  /* Initialise remaining memory on a node */
> -- 
> 2.44.0
> 

-- 
Sincerely yours,
Mike.


Re: [Patch v2] mm/memblock: discard .text/.data if CONFIG_ARCH_KEEP_MEMBLOCK not set

2024-05-24 Thread Mike Rapoport
On Fri, May 24, 2024 at 01:46:56AM +, Wei Yang wrote:
> On Tue, May 21, 2024 at 10:21:52AM +0300, Mike Rapoport wrote:
> >Hi,
> >
> >On Fri, May 10, 2024 at 02:04:22AM +, Wei Yang wrote:
> >> When CONFIG_ARCH_KEEP_MEMBLOCK not set, we expect to discard related
> >> code and data. But it doesn't until CONFIG_MEMORY_HOTPLUG not set
> >> neither.
> >> 
> >> This patch puts memblock's .text/.data into its own section, so that it
> >> only depends on CONFIG_ARCH_KEEP_MEMBLOCK to discard related code and
> >> data.
> >> 
> >> After this, from the log message in mem_init_print_info(), init size
> >> increase from 2420K to 2432K on arch x86.
> >> 
> >> Signed-off-by: Wei Yang 
> >> 
> >> ---
> >> v2: fix orphan section for powerpc
> >> ---
> >>  arch/powerpc/kernel/vmlinux.lds.S |  1 +
> >>  include/asm-generic/vmlinux.lds.h | 14 +-
> >>  include/linux/memblock.h  |  8 
> >>  3 files changed, 18 insertions(+), 5 deletions(-)
> >>  
> >> +#define __init_memblock__section(".mbinit.text") __cold notrace \
> >> +__latent_entropy
> >> +#define __initdata_memblock__section(".mbinit.data")
> >> +
> >
> >The new .mbinit.* sections should be added to scripts/mod/modpost.c
> >alongside .meminit.* sections and then I expect modpost to report a bunch
> >of section mismatches because many memblock functions are called on memory
> >hotplug even on architectures that don't select ARCH_KEEP_MEMBLOCK.
> >
> 
> I tried to add some code in modpost.c, "make all" looks good.
> 
> May I ask how can I trigger the "mismatch" warning?
> 
> BTW, if ARCH_KEEP_MEMBLOCK unset, we would discard memblock meta-data. If
> hotplug would call memblock function, it would be dangerous?
> 
> The additional code I used is like below.
> 
> ---
>  scripts/mod/modpost.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index 937294ff164f..c837e2882904 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -777,14 +777,14 @@ static void check_section(const char *modname, struct 
> elf_info *elf,
>  
>  #define ALL_INIT_DATA_SECTIONS \
>   ".init.setup", ".init.rodata", ".meminit.rodata", \
> - ".init.data", ".meminit.data"
> + ".init.data", ".meminit.data", "mbinit.data"

should be ".mbinit.data"
>  
>  #define ALL_PCI_INIT_SECTIONS\
>   ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \
>   ".pci_fixup_enable", ".pci_fixup_resume", \
>   ".pci_fixup_resume_early", ".pci_fixup_suspend"
>  
> -#define ALL_XXXINIT_SECTIONS ".meminit.*"
> +#define ALL_XXXINIT_SECTIONS ".meminit.*", "mbinit.*"

and ".mbinit.*"

But regardless of typos, when ARCH_KEEP_MEMBLOCK=n the .mbinit is equivalent
to .init and it should not be referenced from .meminit, so I don't think
adding it here is correct.

If I simply alias __init_memblock to __init then with
CONFIG_MEMORY_HOTPLUG=y I get

WARNING: modpost: vmlinux: section mismatch in reference: early_pfn_to_nid+0x42 
(section: .meminit.text) -> memblock_search_pfn_nid (section: .init.text)
WARNING: modpost: vmlinux: section mismatch in reference: 
memmap_init_range+0x142 (section: .meminit.text) -> mirrored_kernelcore 
(section: .init.data)
WARNING: modpost: vmlinux: section mismatch in reference: 
memmap_init_range+0x1e1 (section: .meminit.text) -> memblock (section: 
.init.data)
WARNING: modpost: vmlinux: section mismatch in reference: 
memmap_init_range+0x1e8 (section: .meminit.text) -> memblock (section: 
.init.data)
WARNING: modpost: vmlinux: section mismatch in reference: 
sparse_buffer_alloc+0x3b (section: .meminit.text) -> memblock_free (section: 
.init.text)

>  #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS
>  #define ALL_EXIT_SECTIONS ".exit.*"
> @@ -799,7 +799,7 @@ static void check_section(const char *modname, struct 
> elf_info *elf,
>  
>  #define INIT_SECTIONS  ".init.*"
>  
> -#define ALL_TEXT_SECTIONS  ".init.text", ".meminit.text", ".exit.text", \
> +#define ALL_TEXT_SECTIONS  ".init.text", ".meminit.text", ".mbinit.text", 
> ".exit.text", \
>   TEXT_SECTIONS, OTHER_TEXT_SECTIONS
>  
>  enum mismatch {
> 
> -- 
> Wei Yang
> Help you, Help me
> 

-- 
Sincerely yours,
Mike.


Re: [PATCH v2] mm/mm_init: use node's number of cpus in deferred_page_init_max_threads

2024-05-23 Thread Mike Rapoport
On Wed, May 22, 2024 at 04:38:01PM -0400, Eric Chanudet wrote:
> x86_64 is already using the node's cpu as maximum threads. Make that the
> default for all archs setting DEFERRED_STRUCT_PAGE_INIT.
> 
> This returns to the behavior prior making the function arch-specific
> with commit ecd096506922 ("mm: make deferred init's max threads
> arch-specific").
> 
> Signed-off-by: Eric Chanudet 
> 
> ---
> Setting DEFERRED_STRUCT_PAGE_INIT and testing on a few arm64 platforms
> shows faster deferred_init_memmap completions:
> 
> | | x13s| SA8775p-ride | Ampere R137-P31 | Ampere HR330 |
> | | Metal, 32GB | VM, 36GB | VM, 58GB| Metal, 128GB |
> | | 8cpus   | 8cpus| 8cpus   | 32cpus   |
> |-|-|--|-|--|
> | threads |  ms (%) | ms   (%) |  ms (%) |  ms  (%) |
> |-|-|--|-|--|
> | 1   | 108(0%) | 72  (0%) | 224(0%) | 324 (0%) |
> | cpus|  24  (-77%) | 36(-50%) |  40  (-82%) |  56   (-82%) |
> 
> - v1: 
> https://lore.kernel.org/linux-arm-kernel/20240520231555.395979-5-echan...@redhat.com
> - Changes since v1:
>  - Make the generic function return the number of cpus of the node as
>max threads limit instead overriding it for arm64.
> - Drop Baoquan He's R-b on v1 since the logic changed.
> - Add CCs according to patch changes (ppc and s390 set
>   DEFERRED_STRUCT_PAGE_INIT by default).
> 
>  arch/x86/mm/init_64.c | 12 
>  mm/mm_init.c  |  2 +-
>  2 files changed, 1 insertion(+), 13 deletions(-)
> 
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 7e177856ee4f..adec42928ec1 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1354,18 +1354,6 @@ void __init mem_init(void)
>   preallocate_vmalloc_pages();
>  }
>  
> -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> -int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask)
> -{
> - /*
> -  * More CPUs always led to greater speedups on tested systems, up to
> -  * all the nodes' CPUs.  Use all since the system is otherwise idle
> -  * now.
> -  */
> - return max_t(int, cpumask_weight(node_cpumask), 1);
> -}
> -#endif
> -
>  int kernel_set_to_readonly;
>  
>  void mark_rodata_ro(void)
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index f72b852bd5b8..e0023aa68555 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -2126,7 +2126,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, 
> unsigned long end_pfn,
>  __weak int __init

If s390 folks confirm there's no regression for them I think we can make
this static.

>  deferred_page_init_max_threads(const struct cpumask *node_cpumask)
>  {
> - return 1;
> + return max_t(int, cpumask_weight(node_cpumask), 1);
>  }
>  
>  /* Initialise remaining memory on a node */
> -- 
> 2.44.0
> 

-- 
Sincerely yours,
Mike.


Re: [Patch v2] mm/memblock: discard .text/.data if CONFIG_ARCH_KEEP_MEMBLOCK not set

2024-05-21 Thread Mike Rapoport
Hi,

On Fri, May 10, 2024 at 02:04:22AM +, Wei Yang wrote:
> When CONFIG_ARCH_KEEP_MEMBLOCK not set, we expect to discard related
> code and data. But it doesn't until CONFIG_MEMORY_HOTPLUG not set
> neither.
> 
> This patch puts memblock's .text/.data into its own section, so that it
> only depends on CONFIG_ARCH_KEEP_MEMBLOCK to discard related code and
> data.
> 
> After this, from the log message in mem_init_print_info(), init size
> increase from 2420K to 2432K on arch x86.
> 
> Signed-off-by: Wei Yang 
> 
> ---
> v2: fix orphan section for powerpc
> ---
>  arch/powerpc/kernel/vmlinux.lds.S |  1 +
>  include/asm-generic/vmlinux.lds.h | 14 +-
>  include/linux/memblock.h  |  8 
>  3 files changed, 18 insertions(+), 5 deletions(-)
>  
> +#define __init_memblock__section(".mbinit.text") __cold notrace \
> +   __latent_entropy
> +#define __initdata_memblock__section(".mbinit.data")
> +

The new .mbinit.* sections should be added to scripts/mod/modpost.c
alongside .meminit.* sections and then I expect modpost to report a bunch
of section mismatches because many memblock functions are called on memory
hotplug even on architectures that don't select ARCH_KEEP_MEMBLOCK.

>  #ifndef CONFIG_ARCH_KEEP_MEMBLOCK
> -#define __init_memblock __meminit
> -#define __initdata_memblock __meminitdata
>  void memblock_discard(void);
>  #else
> -#define __init_memblock
> -#define __initdata_memblock
>  static inline void memblock_discard(void) {}
>  #endif
>  
> -- 
> 2.34.1
> 
> 

-- 
Sincerely yours,
Mike.


Re: [PATCH v8 00/17] mm: jit/text allocator

2024-05-05 Thread Mike Rapoport
This is embarrassing, but these patches were from a wrong branch :(
Please ignore.

On Sun, May 05, 2024 at 05:25:43PM +0300, Mike Rapoport wrote:
> From: "Mike Rapoport (IBM)" 
> 
> Hi,
> 
> The patches are also available in git:
> https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v8
> 
> v8:
> * fix intialization of default_execmem_info
> 
> v7: https://lore.kernel.org/all/20240429121620.1186447-1-r...@kernel.org
> * define MODULE_{VADDR,END} for riscv32 to fix the build and avoid
>   #ifdefs in a function body
> * add Acks, thanks everybody
> 
> v6: https://lore.kernel.org/all/20240426082854.7355-1-r...@kernel.org
> * restore patch "arm64: extend execmem_info for generated code
>   allocations" that disappeared in v5 rebase
> * update execmem initialization so that by default it will be
>   initialized early while late initialization will be an opt-in
> 
> v5: https://lore.kernel.org/all/20240422094436.3625171-1-r...@kernel.org
> * rebase on v6.9-rc4 to avoid a conflict in kprobes
> * add copyrights to mm/execmem.c (Luis)
> * fix spelling (Ingo)
> * define MODULES_VADDDR for sparc (Sam)
> * consistently initialize struct execmem_info (Peter)
> * reduce #ifdefs in function bodies in kprobes (Masami) 
> 
> v4: https://lore.kernel.org/all/20240411160051.2093261-1-r...@kernel.org
> * rebase on v6.9-rc2
> * rename execmem_params to execmem_info and execmem_arch_params() to
>   execmem_arch_setup()
> * use single execmem_alloc() API instead of execmem_{text,data}_alloc() (Song)
> * avoid extra copy of execmem parameters (Rick)
> * run execmem_init() as core_initcall() except for the architectures that
>   may allocated text really early (currently only x86) (Will)
> * add acks for some of arm64 and riscv changes, thanks Will and Alexandre
> * new commits:
>   - drop call to kasan_alloc_module_shadow() on arm64 because it's not
> needed anymore
>   - rename MODULE_START to MODULES_VADDR on MIPS
>   - use CONFIG_EXECMEM instead of CONFIG_MODULES on powerpc as per Christophe:
> 
> https://lore.kernel.org/all/79062fa3-3402-47b3-8920-9231ad05e...@csgroup.eu/
> 
> v3: https://lore.kernel.org/all/20230918072955.2507221-1-r...@kernel.org
> * add type parameter to execmem allocation APIs
> * remove BPF dependency on modules
> 
> v2: https://lore.kernel.org/all/20230616085038.4121892-1-r...@kernel.org
> * Separate "module" and "others" allocations with execmem_text_alloc()
> and jit_text_alloc()
> * Drop ROX entailment on x86
> * Add ack for nios2 changes, thanks Dinh Nguyen
> 
> v1: https://lore.kernel.org/all/20230601101257.530867-1-r...@kernel.org
> 
> = Cover letter from v1 (sligtly updated) =
> 
> module_alloc() is used everywhere as a mean to allocate memory for code.
> 
> Beside being semantically wrong, this unnecessarily ties all subsystmes
> that need to allocate code, such as ftrace, kprobes and BPF to modules and
> puts the burden of code allocation to the modules code.
> 
> Several architectures override module_alloc() because of various
> constraints where the executable memory can be located and this causes
> additional obstacles for improvements of code allocation.
> 
> A centralized infrastructure for code allocation allows allocations of
> executable memory as ROX, and future optimizations such as caching large
> pages for better iTLB performance and providing sub-page allocations for
> users that only need small jit code snippets.
> 
> Rick Edgecombe proposed perm_alloc extension to vmalloc [1] and Song Liu
> proposed execmem_alloc [2], but both these approaches were targeting BPF
> allocations and lacked the ground work to abstract executable allocations
> and split them from the modules core.
> 
> Thomas Gleixner suggested to express module allocation restrictions and
> requirements as struct mod_alloc_type_params [3] that would define ranges,
> protections and other parameters for different types of allocations used by
> modules and following that suggestion Song separated allocations of
> different types in modules (commit ac3b43283923 ("module: replace
> module_layout with module_memory")) and posted "Type aware module
> allocator" set [4].
> 
> I liked the idea of parametrising code allocation requirements as a
> structure, but I believe the original proposal and Song's module allocator
> was too module centric, so I came up with these patches.
> 
> This set splits code allocation from modules by introducing execmem_alloc()
> and and execmem_free(), APIs, replaces call sites of module_alloc() and
> module_memfree() with the new APIs and implements core text and related
> allocations in a central place.
> 
> 

[PATCH RESEND v8 16/16] bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

BPF just-in-time compiler depended on CONFIG_MODULES because it used
module_alloc() to allocate memory for the generated code.

Since code allocations are now implemented with execmem, drop dependency of
CONFIG_BPF_JIT on CONFIG_MODULES and make it select CONFIG_EXECMEM.

Suggested-by: Björn Töpel 
Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/bpf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bc25f5098a25..f999e4e0b344 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -43,7 +43,7 @@ config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-   depends on MODULES
+   select EXECMEM
help
  BPF programs are normally handled by a BPF interpreter. This option
  allows the kernel to generate native code when a program is loaded
-- 
2.43.0



[PATCH RESEND v8 15/16] kprobes: remove dependency on CONFIG_MODULES

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

kprobes depended on CONFIG_MODULES because it has to allocate memory for
code.

Since code allocations are now implemented with execmem, kprobes can be
enabled in non-modular kernels.

Add #ifdef CONFIG_MODULE guards for the code dealing with kprobes inside
modules, make CONFIG_KPROBES select CONFIG_EXECMEM and drop the
dependency of CONFIG_KPROBES on CONFIG_MODULES.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
---
 arch/Kconfig|  2 +-
 include/linux/module.h  |  9 ++
 kernel/kprobes.c| 55 +++--
 kernel/trace/trace_kprobe.c | 20 +-
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4fd0daa54e6c..caa459964f09 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,9 +52,9 @@ config GENERIC_ENTRY
 
 config KPROBES
bool "Kprobes"
-   depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+   select EXECMEM
select TASKS_RCU if PREEMPTION
help
  Kprobes allows you to trap at almost any kernel address and
diff --git a/include/linux/module.h b/include/linux/module.h
index 1153b0d99a80..ffa1c603163c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod)
return mod->state != MODULE_STATE_GOING;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+return mod->state == MODULE_STATE_COMING;
+}
+
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
@@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module 
*mod, void *ptr)
return ptr;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+   return false;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ddd7cdc16edf..ca2c6cbd42d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,7 +1588,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
}
 
/* Get module refcount and reject __init functions for loaded modules. 
*/
-   if (*probed_mod) {
+   if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
/*
 * We must hold a refcount of the probed module while updating
 * its code to prohibit unexpected unloading.
@@ -1603,12 +1603,13 @@ static int check_kprobe_address_safe(struct kprobe *p,
 * kprobes in there.
 */
if (within_module_init((unsigned long)p->addr, *probed_mod) &&
-   (*probed_mod)->state != MODULE_STATE_COMING) {
+   !module_is_coming(*probed_mod)) {
module_put(*probed_mod);
*probed_mod = NULL;
ret = -ENOENT;
}
}
+
 out:
preempt_enable();
jump_label_unlock();
@@ -2488,24 +2489,6 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
return 0;
 }
 
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
-{
-   struct kprobe_blacklist_entry *ent, *n;
-
-   list_for_each_entry_safe(ent, n, _blacklist, list) {
-   if (ent->start_addr < start || ent->start_addr >= end)
-   continue;
-   list_del(>list);
-   kfree(ent);
-   }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
-   kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   char *type, char *sym)
 {
@@ -2570,6 +2553,25 @@ static int __init populate_kprobe_blacklist(unsigned 
long *start,
return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
+{
+   struct kprobe_blacklist_entry *ent, *n;
+
+   list_for_each_entry_safe(ent, n, _blacklist, list) {
+   if (ent->start_addr < start || ent->start_addr >= end)
+   continue;
+   list_del(>list);
+   kfree(ent);
+   }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+   kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 static void add_module_kprobe_blacklist(struct module *mod)
 {
unsigned long start, end;
@@ -2672,6 +2674,17 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static int kprobe_register_module_notifier(void)

[PATCH RESEND v8 14/16] powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

There are places where CONFIG_MODULES guards the code that depends on
memory allocation being done with module_alloc().

Replace CONFIG_MODULES with CONFIG_EXECMEM in such places.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/Kconfig | 2 +-
 arch/powerpc/include/asm/kasan.h | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 4 ++--
 arch/powerpc/kernel/head_book3s_32.S | 6 +++---
 arch/powerpc/lib/code-patching.c | 2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..2e586733a464 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,7 +285,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
-   select KASAN_VMALLOCif KASAN && MODULES
+   select KASAN_VMALLOCif KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT   3
 
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
 #define KASAN_KERN_START   ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START   PAGE_OFFSET
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr   SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcrr11
compare_to_kernel_boundary r10, r10
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+3f
rlwinm  r10, r10, 0, 20, 31
orisr10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SDR1
li  r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm  r2, r2, 28, 0xf000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li  r0, 3
bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc.   r1,r1,r2/* check access & ~permission */
bne-InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi  r2, r0, 0, 31, 31   /* userspace ? -> PP lsb */
 #endif
ori r1, r1, 0xe06   /* clear out reserved bits */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..7af791446ddf 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
 
 static unsigned long get_patch_pfn(void *addr)
 {
-   if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+   if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
 
 static bool is_module_segment(unsigned long addr)
 {
-   if (!IS_ENABLED(CONFIG_MODULES))
+   if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
-- 
2.43.0



[PATCH RESEND v8 13/16] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Dynamic ftrace must allocate memory for code and this was impossible
without CONFIG_MODULES.

With execmem separated from the modules code, execmem_text_alloc() is
available regardless of CONFIG_MODULES.

Remove dependency of dynamic ftrace on CONFIG_MODULES and make
CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/x86/Kconfig |  1 +
 arch/x86/kernel/ftrace.c | 10 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4474bf32d0a4..f2917ccf4fb4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+   select EXECMEM if DYNAMIC_FTRACE
 
 config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c8ddb7abda7c..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -261,8 +261,6 @@ void arch_ftrace_update_code(int command)
 /* Currently only x86_64 supports dynamic trampolines */
 #ifdef CONFIG_X86_64
 
-#ifdef CONFIG_MODULES
-/* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
return execmem_alloc(EXECMEM_FTRACE, size);
@@ -271,14 +269,6 @@ static inline void tramp_free(void *tramp)
 {
execmem_free(tramp);
 }
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
-   return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-- 
2.43.0



[PATCH RESEND v8 12/16] arch: make execmem setup available regardless of CONFIG_MODULES

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

execmem does not depend on modules, on the contrary modules use
execmem.

To make execmem available when CONFIG_MODULES=n, for instance for
kprobes, split execmem_params initialization out from
arch/*/kernel/module.c and compile it when CONFIG_EXECMEM=y

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Philippe Mathieu-Daudé 
---
 arch/arm/kernel/module.c   |  43 --
 arch/arm/mm/init.c |  45 +++
 arch/arm64/kernel/module.c | 140 -
 arch/arm64/mm/init.c   | 140 +
 arch/loongarch/kernel/module.c |  19 -
 arch/loongarch/mm/init.c   |  21 +
 arch/mips/kernel/module.c  |  22 --
 arch/mips/mm/init.c|  23 ++
 arch/nios2/kernel/module.c |  20 -
 arch/nios2/mm/init.c   |  21 +
 arch/parisc/kernel/module.c|  20 -
 arch/parisc/mm/init.c  |  23 +-
 arch/powerpc/kernel/module.c   |  63 ---
 arch/powerpc/mm/mem.c  |  64 +++
 arch/riscv/kernel/module.c |  34 
 arch/riscv/mm/init.c   |  35 +
 arch/s390/kernel/module.c  |  27 ---
 arch/s390/mm/init.c|  30 +++
 arch/sparc/kernel/module.c |  19 -
 arch/sparc/mm/Makefile |   2 +
 arch/sparc/mm/execmem.c|  21 +
 arch/x86/kernel/module.c   |  27 ---
 arch/x86/mm/init.c |  29 +++
 23 files changed, 453 insertions(+), 435 deletions(-)
 create mode 100644 arch/sparc/mm/execmem.c

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index a98fdf6ff26c..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,57 +12,14 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
 
 #include 
 #include 
 #include 
 #include 
 
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-static struct execmem_info execmem_info __ro_after_init;
-
-struct execmem_info __init *execmem_arch_setup(void)
-{
-   unsigned long fallback_start = 0, fallback_end = 0;
-
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
-   fallback_start = VMALLOC_START;
-   fallback_end = VMALLOC_END;
-   }
-
-   execmem_info = (struct execmem_info){
-   .ranges = {
-   [EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
-   .pgprot = PAGE_KERNEL_EXEC,
-   .alignment = 1,
-   .fallback_start = fallback_start,
-   .fallback_end   = fallback_end,
-   },
-   },
-   };
-
-   return _info;
-}
-#endif
-
 bool module_init_section(const char *name)
 {
return strstarts(name, ".init") ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long 
end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   

[PATCH RESEND v8 11/16] powerpc: extend execmem_params for kprobes allocations

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

powerpc overrides kprobes::alloc_insn_page() to remove writable
permissions when STRICT_MODULE_RWX is on.

Add definition of EXECMEM_KRPOBES to execmem_params to allow using the
generic kprobes::alloc_insn_page() with the desired permissions.

As powerpc uses breakpoint instructions to inject kprobes, it does not
need to constrain kprobe allocations to the modules area and can use the
entire vmalloc address space.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/kernel/kprobes.c | 20 
 arch/powerpc/kernel/module.c  |  7 +++
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9fcd01bb2ce6..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long 
addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
 }
 
-void *alloc_insn_page(void)
-{
-   void *page;
-
-   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
-   if (!page)
-   return NULL;
-
-   if (strict_module_rwx_enabled()) {
-   int err = set_memory_rox((unsigned long)page, 1);
-
-   if (err)
-   goto error;
-   }
-   return page;
-error:
-   execmem_free(page);
-   return NULL;
-}
-
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ac80559015a3..2a23cf7e141b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -94,6 +94,7 @@ static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : 
PAGE_KERNEL_EXEC;
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : 
PAGE_KERNEL_EXEC;
unsigned long fallback_start = 0, fallback_end = 0;
unsigned long start, end;
@@ -132,6 +133,12 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = kprobes_prot,
+   .alignment = 1,
+   },
[EXECMEM_MODULE_DATA] = {
.start  = VMALLOC_START,
.end= VMALLOC_END,
-- 
2.43.0



[PATCH RESEND v8 10/16] arm64: extend execmem_info for generated code allocations

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on arm64 can be placed
anywhere in vmalloc address space and currently this is implemented with
overrides of alloc_insn_page() and bpf_jit_alloc_exec() in arm64.

Define EXECMEM_KPROBES and EXECMEM_BPF ranges in arm64::execmem_info and
drop overrides of alloc_insn_page() and bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
---
 arch/arm64/kernel/module.c | 12 
 arch/arm64/kernel/probes/kprobes.c |  7 ---
 arch/arm64/net/bpf_jit_comp.c  | 11 ---
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index b7a7a23f9f8f..a52240ea084b 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -146,6 +146,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_ROX,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
},
};
 
diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..4268678d0e86 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-void *alloc_insn_page(void)
-{
-   return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
-   NUMA_NO_NODE, __builtin_return_address(0));
-}
-
 /* arm kprobe: install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..456f5af239fc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1793,17 +1793,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return VMALLOC_END - VMALLOC_START;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   /* Memory is intended to be executable, reset the pointer tag. */
-   return kasan_reset_tag(vmalloc(size));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
 bool bpf_jit_supports_subprog_tailcalls(void)
 {
-- 
2.43.0



[PATCH RESEND v8 09/16] riscv: extend execmem_params for generated code allocations

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and  bpf_jit_alloc_exec().

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END for
32 bit and slightly reorder execmem_params initialization to support both
32 and 64 bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Alexandre Ghiti 
---
 arch/riscv/include/asm/pgtable.h   |  3 +++
 arch/riscv/kernel/module.c | 14 +-
 arch/riscv/kernel/probes/kprobes.c | 10 --
 arch/riscv/net/bpf_jit_core.c  | 13 -
 4 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9f8ea0e33eb1..5f21814e438e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -55,6 +55,9 @@
 #define MODULES_LOWEST_VADDR   (KERNEL_LINK_ADDR - SZ_2G)
 #define MODULES_VADDR  (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
 #define MODULES_END(PFN_ALIGN((unsigned long)&_start))
+#else
+#define MODULES_VADDR  VMALLOC_START
+#define MODULES_ENDVMALLOC_END
 #endif
 
 /*
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..0e6415f00fca 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,7 +906,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
 static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
@@ -919,6 +919,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_READ_EXEC,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = BPF_JIT_REGION_START,
+   .end= BPF_JIT_REGION_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = PAGE_SIZE,
+   },
},
};
 
diff --git a/arch/riscv/kernel/probes/kprobes.c 
b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
-   return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
-VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-__builtin_return_address(0));
-}
-#endif
-
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
-   BPF_JIT_REGION_END, GFP_KERNEL,
-   PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 {
int ret;
-- 
2.43.0



[PATCH RESEND v8 08/16] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Extend execmem parameters to accommodate more complex overrides of
module_alloc() by architectures.

This includes specification of a fallback range required by arm, arm64
and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
allocation of KASAN shadow required by s390 and x86 and support for
late initialization of execmem required by arm64.

The core implementation of execmem_alloc() takes care of suppressing
warnings when the initial allocation fails but there is a fallback range
defined.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
Acked-by: Song Liu 
Tested-by: Liviu Dudau 
---
 arch/Kconfig |  8 
 arch/arm/kernel/module.c | 41 
 arch/arm64/Kconfig   |  1 +
 arch/arm64/kernel/module.c   | 55 +++
 arch/powerpc/kernel/module.c | 60 +++--
 arch/s390/kernel/module.c| 54 +++---
 arch/x86/kernel/module.c | 70 +++---
 include/linux/execmem.h  | 30 ++-
 include/linux/moduleloader.h | 12 --
 kernel/module/main.c | 26 +++--
 mm/execmem.c | 74 ++--
 11 files changed, 246 insertions(+), 185 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 65afb1de48b3..4fd0daa54e6c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
  For architectures like powerpc/32 which have constraints on module
  allocation and need to allocate module data outside of module area.
 
+config ARCH_WANTS_EXECMEM_LATE
+   bool
+   help
+ For architectures that do not allocate executable memory early on
+ boot, but rather require its initialization late when there is
+ enough entropy for module space randomization, for instance
+ arm64.
+
 config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..a98fdf6ff26c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -34,23 +35,31 @@
 #endif
 
 #ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   gfp_t gfp_mask = GFP_KERNEL;
-   void *p;
-
-   /* Silence the initial allocation */
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
-   gfp_mask |= __GFP_NOWARN;
-
-   p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-   if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
-   return p;
-   return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   .fallback_end   = fallback_end,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84..74b34a78b7ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,6 +105,7 @@ config ARM64
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES 
&& !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
+   select ARCH_WANTS_EXECMEM_LATE if EXECMEM
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
select ARCH_HAS_UBSAN
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e92da4da1b2a..b7a7a23f9f8f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -108,41 +109,47 @@ static int __init module_init_limits(void)
 
return 0;
 }
-subsys_initcall(module_init_limits);
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct e

[PATCH RESEND v8 07/16] mm/execmem, arch: convert simple overrides of module_alloc to execmem

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Several architectures override module_alloc() only to define address
range for code allocations different than VMALLOC address space.

Provide a generic implementation in execmem that uses the parameters for
address space ranges, required alignment and page protections provided
by architectures.

The architectures must fill execmem_info structure and implement
execmem_arch_setup() that returns a pointer to that structure. This way the
execmem initialization won't be called from every architecture, but rather
from a central place, namely a core_initcall() in execmem.

The execmem provides execmem_alloc() API that wraps __vmalloc_node_range()
with the parameters defined by the architectures.  If an architecture does
not implement execmem_arch_setup(), execmem_alloc() will fall back to
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Song Liu 
---
 arch/loongarch/kernel/module.c | 19 --
 arch/mips/kernel/module.c  | 20 --
 arch/nios2/kernel/module.c | 21 ---
 arch/parisc/kernel/module.c| 24 
 arch/riscv/kernel/module.c | 24 
 arch/sparc/kernel/module.c | 20 --
 include/linux/execmem.h| 47 
 mm/execmem.c   | 67 --
 mm/mm_init.c   |  2 +
 9 files changed, 210 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..ca6dd7ea1610 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -490,10 +491,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, 
__builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 9a6c96014904..59225a3cf918 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct mips_hi16 {
@@ -32,11 +33,22 @@ static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
 #ifdef MODULES_VADDR
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 9c97b7513853..0d1ee86631fc 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -18,15 +18,26 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC,
-   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
diff --git a/arch/par

[PATCH RESEND v8 06/16] mm: introduce execmem_alloc() and execmem_free()

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
Acked-by: Song Liu 
---
 arch/powerpc/kernel/kprobes.c|  6 ++--
 arch/s390/kernel/ftrace.c|  4 +--
 arch/s390/kernel/kprobes.c   |  4 +--
 arch/s390/kernel/module.c|  5 +--
 arch/sparc/net/bpf_jit_comp_32.c |  8 ++---
 arch/x86/kernel/ftrace.c |  6 ++--
 arch/x86/kernel/kprobes/core.c   |  4 +--
 include/linux/execmem.h  | 57 
 include/linux/moduleloader.h |  3 --
 kernel/bpf/core.c|  6 ++--
 kernel/kprobes.c |  8 ++---
 kernel/module/Kconfig|  1 +
 kernel/module/main.c | 25 +-
 mm/Kconfig   |  3 ++
 mm/Makefile  |  1 +
 mm/execmem.c | 32 ++
 16 files changed, 128 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/execmem.h
 create mode 100644 mm/execmem.c

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..9fcd01bb2ce6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -130,7 +130,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
 
@@ -142,7 +142,7 @@ void *alloc_insn_page(void)
}
return page;
 error:
-   module_memfree(page);
+   execmem_free(page);
return NULL;
 }
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..798249ef5646 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
  *   Author(s): Martin Schwidefsky 
  */
 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
 {
const char *start, *end;
 
-   ftrace_plt = module_alloc(PAGE_SIZE);
+   ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
 
 #define pr_fmt(fmt) "kprobes: " fmt
 
-#include 
 #include 
 #include 
 #include 
@@ -21,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..ac97a905e8cd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
 #ifdef CONFIG_FUNCTION_TRACER
 void module_arch_cleanup(struct module *mod)
 {
-   module_memfree(mod->arch.trampolines_start);
+   execmem_free(mod->arch.trampolines_start);
 }
 #endif
 
@@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct 
module *me,
 
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
-   start = module_alloc(numpages * PAGE_SIZE);
+   start = execmem_alloc(EXECMEM_FTRACE, nu

[PATCH RESEND v8 05/16] module: make module_memory_{alloc,free} more self-contained

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Move the logic related to the memory allocation and freeing into
module_memory_alloc() and module_memory_free().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Philippe Mathieu-Daudé 
---
 kernel/module/main.c | 64 +++-
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c1..5b82b069e0d3 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type)
mod_mem_type_is_core_data(type);
 }
 
-static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
+static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
 {
+   unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+   void *ptr;
+
+   mod->mem[type].size = size;
+
if (mod_mem_use_vmalloc(type))
-   return vzalloc(size);
-   return module_alloc(size);
+   ptr = vmalloc(size);
+   else
+   ptr = module_alloc(size);
+
+   if (!ptr)
+   return -ENOMEM;
+
+   /*
+* The pointer to these blocks of memory are stored on the module
+* structure and we keep that around so long as the module is
+* around. We only free that memory when we unload the module.
+* Just mark them as not being a leak then. The .init* ELF
+* sections *do* get freed after boot so we *could* treat them
+* slightly differently with kmemleak_ignore() and only grey
+* them out as they work as typical memory allocations which
+* *do* eventually get freed, but let's just keep things simple
+* and avoid *any* false positives.
+*/
+   kmemleak_not_leak(ptr);
+
+   memset(ptr, 0, size);
+   mod->mem[type].base = ptr;
+
+   return 0;
 }
 
-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(struct module *mod, enum mod_mem_type type)
 {
+   void *ptr = mod->mem[type].base;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
@@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
-   module_memory_free(mod_mem->base, type);
+   module_memory_free(mod, type);
}
 
/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, 
mod->mem[MOD_DATA].size);
-   module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+   module_memory_free(mod, MOD_DATA);
 }
 
 /* Free a module, remove from lists, etc. */
@@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 static int move_module(struct module *mod, struct load_info *info)
 {
int i;
-   void *ptr;
enum mod_mem_type t = 0;
int ret = -ENOMEM;
 
@@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct 
load_info *info)
mod->mem[type].base = NULL;
continue;
}
-   mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
-   ptr = module_memory_alloc(mod->mem[type].size, type);
-   /*
- * The pointer to these blocks of memory are stored on the 
module
- * structure and we keep that around so long as the module is
- * around. We only free that memory when we unload the module.
- * Just mark them as not being a leak then. The .init* ELF
- * sections *do* get freed after boot so we *could* treat them
- * slightly differently with kmemleak_ignore() and only grey
- * them out as they work as typical memory allocations which
- * *do* eventually get freed, but let's just keep things simple
- * and avoid *any* false positives.
-*/
-   kmemleak_not_leak(ptr);
-   if (!ptr) {
+
+   ret = module_memory_alloc(mod, type);
+   if (ret) {
t = type;
goto out_enomem;
}
-   memset(ptr, 0, mod->mem[type].size);
-   mod->mem[type].base = ptr;
}
 
/* Transfer each section which specifies SHF_ALLOC */
@@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
return 0;
 out_enomem:
for (t--; t >= 0; t--)
-   module_memory_free(mod->mem[t].base, t);
+   module_memory_free(mod, t);
return ret;
 }
 
-- 
2.43.0



[PATCH RESEND v8 04/16] sparc: simplify module_alloc()

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END
for 32-bit and reduce module_alloc() to

__vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, ...)

as with the new defines the allocations becomes identical for both 32
and 64 bits.

While on it, drop unused include of 

Suggested-by: Sam Ravnborg 
Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Sam Ravnborg 
---
 arch/sparc/include/asm/pgtable_32.h |  2 ++
 arch/sparc/kernel/module.c  | 25 +
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h 
b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct 
*vma,
 
 #define VMALLOC_START   _AC(0xfe60,UL)
 #define VMALLOC_END _AC(0xffc0,UL)
+#define MODULES_VADDR   VMALLOC_START
+#define MODULES_END VMALLOC_END
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..d37adb2a0b54 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,35 +21,12 @@
 
 #include "entry.h"
 
-#ifdef CONFIG_SPARC64
-
-#include 
-
-static void *module_map(unsigned long size)
+void *module_alloc(unsigned long size)
 {
-   if (PAGE_ALIGN(size) > MODULES_LEN)
-   return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
-#else
-static void *module_map(unsigned long size)
-{
-   return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
-   void *ret;
-
-   ret = module_map(size);
-   if (ret)
-   memset(ret, 0, size);
-
-   return ret;
-}
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
-- 
2.43.0



[PATCH RESEND v8 03/16] nios2: define virtual address space for modules

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26
cannot reach all of vmalloc address space.

Define module space as 32MiB below the kernel base and switch nios2 to
use vmalloc for module allocations.

Suggested-by: Thomas Gleixner 
Acked-by: Dinh Nguyen 
Acked-by: Song Liu 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/nios2/include/asm/pgtable.h |  5 -
 arch/nios2/kernel/module.c   | 19 ---
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
 #include 
 
 #define VMALLOC_START  CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR  (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
 
 struct mm_struct;
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..9c97b7513853 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -21,23 +21,12 @@
 
 #include 
 
-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x8000 (vmalloc area) to 0xc (kernel) (kmalloc returns
- * addresses in 0xc000)
- */
 void *module_alloc(unsigned long size)
 {
-   if (size == 0)
-   return NULL;
-   return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
-   kfree(module_region);
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+   GFP_KERNEL, PAGE_KERNEL_EXEC,
+   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+   __builtin_return_address(0));
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
-- 
2.43.0



[PATCH RESEND v8 02/16] mips: module: rename MODULE_START to MODULES_VADDR

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

and MODULE_END to MODULES_END to match other architectures that define
custom address space for modules.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/mips/include/asm/pgtable-64.h | 4 ++--
 arch/mips/kernel/module.c  | 4 ++--
 arch/mips/mm/fault.c   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
 /* Load modules into 32bit-compatible segment. */
-#define MODULE_START   CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR  CKSSEG
+#define MODULES_END(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
 #define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..9a6c96014904 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -31,10 +31,10 @@ struct mips_hi16 {
 static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
-#ifdef MODULE_START
+#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
-   return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned 
long write,
 
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
-   if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+   if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
 #endif
 
-- 
2.43.0



[PATCH RESEND v8 01/16] arm64: module: remove unneeded call to kasan_alloc_module_shadow()

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Since commit f6f37d9320a1 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS
modes") KASAN_VMALLOC is always enabled when KASAN is on. This means
that allocations in module_alloc() will be tracked by KASAN protection
for vmalloc() and that kasan_alloc_module_shadow() will be always an
empty inline and there is no point in calling it.

Drop meaningless call to kasan_alloc_module_shadow() from
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm64/kernel/module.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..e92da4da1b2a 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -141,11 +141,6 @@ void *module_alloc(unsigned long size)
__func__);
}
 
-   if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
-   vfree(p);
-   return NULL;
-   }
-
/* Memory is intended to be executable, reset the pointer tag. */
return kasan_reset_tag(p);
 }
-- 
2.43.0



[PATCH RESEND v8 00/16] mm: jit/text allocator

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Hi,

The patches are also available in git:
https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v8

v8:
* fix intialization of default_execmem_info

v7: https://lore.kernel.org/all/20240429121620.1186447-1-r...@kernel.org
* define MODULE_{VADDR,END} for riscv32 to fix the build and avoid
  #ifdefs in a function body
* add Acks, thanks everybody

v6: https://lore.kernel.org/all/20240426082854.7355-1-r...@kernel.org
* restore patch "arm64: extend execmem_info for generated code
  allocations" that disappeared in v5 rebase
* update execmem initialization so that by default it will be
  initialized early while late initialization will be an opt-in

v5: https://lore.kernel.org/all/20240422094436.3625171-1-r...@kernel.org
* rebase on v6.9-rc4 to avoid a conflict in kprobes
* add copyrights to mm/execmem.c (Luis)
* fix spelling (Ingo)
* define MODULES_VADDDR for sparc (Sam)
* consistently initialize struct execmem_info (Peter)
* reduce #ifdefs in function bodies in kprobes (Masami) 

v4: https://lore.kernel.org/all/20240411160051.2093261-1-r...@kernel.org
* rebase on v6.9-rc2
* rename execmem_params to execmem_info and execmem_arch_params() to
  execmem_arch_setup()
* use single execmem_alloc() API instead of execmem_{text,data}_alloc() (Song)
* avoid extra copy of execmem parameters (Rick)
* run execmem_init() as core_initcall() except for the architectures that
  may allocated text really early (currently only x86) (Will)
* add acks for some of arm64 and riscv changes, thanks Will and Alexandre
* new commits:
  - drop call to kasan_alloc_module_shadow() on arm64 because it's not
needed anymore
  - rename MODULE_START to MODULES_VADDR on MIPS
  - use CONFIG_EXECMEM instead of CONFIG_MODULES on powerpc as per Christophe:
https://lore.kernel.org/all/79062fa3-3402-47b3-8920-9231ad05e...@csgroup.eu/

v3: https://lore.kernel.org/all/20230918072955.2507221-1-r...@kernel.org
* add type parameter to execmem allocation APIs
* remove BPF dependency on modules

v2: https://lore.kernel.org/all/20230616085038.4121892-1-r...@kernel.org
* Separate "module" and "others" allocations with execmem_text_alloc()
and jit_text_alloc()
* Drop ROX entailment on x86
* Add ack for nios2 changes, thanks Dinh Nguyen

v1: https://lore.kernel.org/all/20230601101257.530867-1-r...@kernel.org

= Cover letter from v1 (sligtly updated) =

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystmes
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

A centralized infrastructure for code allocation allows allocations of
executable memory as ROX, and future optimizations such as caching large
pages for better iTLB performance and providing sub-page allocations for
users that only need small jit code snippets.

Rick Edgecombe proposed perm_alloc extension to vmalloc [1] and Song Liu
proposed execmem_alloc [2], but both these approaches were targeting BPF
allocations and lacked the ground work to abstract executable allocations
and split them from the modules core.

Thomas Gleixner suggested to express module allocation restrictions and
requirements as struct mod_alloc_type_params [3] that would define ranges,
protections and other parameters for different types of allocations used by
modules and following that suggestion Song separated allocations of
different types in modules (commit ac3b43283923 ("module: replace
module_layout with module_memory")) and posted "Type aware module
allocator" set [4].

I liked the idea of parametrising code allocation requirements as a
structure, but I believe the original proposal and Song's module allocator
was too module centric, so I came up with these patches.

This set splits code allocation from modules by introducing execmem_alloc()
and and execmem_free(), APIs, replaces call sites of module_alloc() and
module_memfree() with the new APIs and implements core text and related
allocations in a central place.

Instead of architecture specific overrides for module_alloc(), the
architectures that require non-default behaviour for text allocation must
fill execmem_info structure and implement execmem_arch_setup() that returns
a pointer to that structure. If an architecture does not implement
execmem_arch_setup(), the defaults compatible with the current
modules::module_alloc() are used.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem APIs
take a type argument, that will be used to identif

[PATCH v8 17/17] fixup: convert remaining archs: defaults handling

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Signed-off-by: Mike Rapoport (IBM) 
---
 mm/execmem.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/execmem.c b/mm/execmem.c
index f6dc3fabc1ca..0c4b36bc6d10 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -118,7 +118,6 @@ static void __init __execmem_init(void)
info->ranges[EXECMEM_DEFAULT].end = VMALLOC_END;
info->ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL_EXEC;
info->ranges[EXECMEM_DEFAULT].alignment = 1;
-   return;
}
 
if (!execmem_validate(info))
-- 
2.43.0



[PATCH v8 16/17] bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

BPF just-in-time compiler depended on CONFIG_MODULES because it used
module_alloc() to allocate memory for the generated code.

Since code allocations are now implemented with execmem, drop dependency of
CONFIG_BPF_JIT on CONFIG_MODULES and make it select CONFIG_EXECMEM.

Suggested-by: Björn Töpel 
Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/bpf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bc25f5098a25..f999e4e0b344 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -43,7 +43,7 @@ config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-   depends on MODULES
+   select EXECMEM
help
  BPF programs are normally handled by a BPF interpreter. This option
  allows the kernel to generate native code when a program is loaded
-- 
2.43.0



[PATCH v8 15/17] kprobes: remove dependency on CONFIG_MODULES

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

kprobes depended on CONFIG_MODULES because it has to allocate memory for
code.

Since code allocations are now implemented with execmem, kprobes can be
enabled in non-modular kernels.

Add #ifdef CONFIG_MODULE guards for the code dealing with kprobes inside
modules, make CONFIG_KPROBES select CONFIG_EXECMEM and drop the
dependency of CONFIG_KPROBES on CONFIG_MODULES.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
---
 arch/Kconfig|  2 +-
 include/linux/module.h  |  9 ++
 kernel/kprobes.c| 55 +++--
 kernel/trace/trace_kprobe.c | 20 +-
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4fd0daa54e6c..caa459964f09 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,9 +52,9 @@ config GENERIC_ENTRY
 
 config KPROBES
bool "Kprobes"
-   depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+   select EXECMEM
select TASKS_RCU if PREEMPTION
help
  Kprobes allows you to trap at almost any kernel address and
diff --git a/include/linux/module.h b/include/linux/module.h
index 1153b0d99a80..ffa1c603163c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod)
return mod->state != MODULE_STATE_GOING;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+return mod->state == MODULE_STATE_COMING;
+}
+
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
@@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module 
*mod, void *ptr)
return ptr;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+   return false;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ddd7cdc16edf..ca2c6cbd42d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,7 +1588,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
}
 
/* Get module refcount and reject __init functions for loaded modules. 
*/
-   if (*probed_mod) {
+   if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
/*
 * We must hold a refcount of the probed module while updating
 * its code to prohibit unexpected unloading.
@@ -1603,12 +1603,13 @@ static int check_kprobe_address_safe(struct kprobe *p,
 * kprobes in there.
 */
if (within_module_init((unsigned long)p->addr, *probed_mod) &&
-   (*probed_mod)->state != MODULE_STATE_COMING) {
+   !module_is_coming(*probed_mod)) {
module_put(*probed_mod);
*probed_mod = NULL;
ret = -ENOENT;
}
}
+
 out:
preempt_enable();
jump_label_unlock();
@@ -2488,24 +2489,6 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
return 0;
 }
 
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
-{
-   struct kprobe_blacklist_entry *ent, *n;
-
-   list_for_each_entry_safe(ent, n, _blacklist, list) {
-   if (ent->start_addr < start || ent->start_addr >= end)
-   continue;
-   list_del(>list);
-   kfree(ent);
-   }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
-   kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   char *type, char *sym)
 {
@@ -2570,6 +2553,25 @@ static int __init populate_kprobe_blacklist(unsigned 
long *start,
return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
+{
+   struct kprobe_blacklist_entry *ent, *n;
+
+   list_for_each_entry_safe(ent, n, _blacklist, list) {
+   if (ent->start_addr < start || ent->start_addr >= end)
+   continue;
+   list_del(>list);
+   kfree(ent);
+   }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+   kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 static void add_module_kprobe_blacklist(struct module *mod)
 {
unsigned long start, end;
@@ -2672,6 +2674,17 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static int kprobe_register_module_notifier(void)

[PATCH v8 14/17] powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

There are places where CONFIG_MODULES guards the code that depends on
memory allocation being done with module_alloc().

Replace CONFIG_MODULES with CONFIG_EXECMEM in such places.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/Kconfig | 2 +-
 arch/powerpc/include/asm/kasan.h | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 4 ++--
 arch/powerpc/kernel/head_book3s_32.S | 6 +++---
 arch/powerpc/lib/code-patching.c | 2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..2e586733a464 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,7 +285,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
-   select KASAN_VMALLOCif KASAN && MODULES
+   select KASAN_VMALLOCif KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT   3
 
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
 #define KASAN_KERN_START   ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START   PAGE_OFFSET
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr   SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcrr11
compare_to_kernel_boundary r10, r10
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+3f
rlwinm  r10, r10, 0, 20, 31
orisr10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SDR1
li  r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm  r2, r2, 28, 0xf000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li  r0, 3
bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc.   r1,r1,r2/* check access & ~permission */
bne-InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi  r2, r0, 0, 31, 31   /* userspace ? -> PP lsb */
 #endif
ori r1, r1, 0xe06   /* clear out reserved bits */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..7af791446ddf 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
 
 static unsigned long get_patch_pfn(void *addr)
 {
-   if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+   if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
 
 static bool is_module_segment(unsigned long addr)
 {
-   if (!IS_ENABLED(CONFIG_MODULES))
+   if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
-- 
2.43.0



[PATCH v8 13/17] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Dynamic ftrace must allocate memory for code and this was impossible
without CONFIG_MODULES.

With execmem separated from the modules code, execmem_text_alloc() is
available regardless of CONFIG_MODULES.

Remove dependency of dynamic ftrace on CONFIG_MODULES and make
CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/x86/Kconfig |  1 +
 arch/x86/kernel/ftrace.c | 10 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4474bf32d0a4..f2917ccf4fb4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+   select EXECMEM if DYNAMIC_FTRACE
 
 config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c8ddb7abda7c..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -261,8 +261,6 @@ void arch_ftrace_update_code(int command)
 /* Currently only x86_64 supports dynamic trampolines */
 #ifdef CONFIG_X86_64
 
-#ifdef CONFIG_MODULES
-/* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
return execmem_alloc(EXECMEM_FTRACE, size);
@@ -271,14 +269,6 @@ static inline void tramp_free(void *tramp)
 {
execmem_free(tramp);
 }
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
-   return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-- 
2.43.0



[PATCH v8 12/17] arch: make execmem setup available regardless of CONFIG_MODULES

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

execmem does not depend on modules, on the contrary modules use
execmem.

To make execmem available when CONFIG_MODULES=n, for instance for
kprobes, split execmem_params initialization out from
arch/*/kernel/module.c and compile it when CONFIG_EXECMEM=y

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Philippe Mathieu-Daudé 
---
 arch/arm/kernel/module.c   |  43 --
 arch/arm/mm/init.c |  45 +++
 arch/arm64/kernel/module.c | 140 -
 arch/arm64/mm/init.c   | 140 +
 arch/loongarch/kernel/module.c |  19 -
 arch/loongarch/mm/init.c   |  21 +
 arch/mips/kernel/module.c  |  22 --
 arch/mips/mm/init.c|  23 ++
 arch/nios2/kernel/module.c |  20 -
 arch/nios2/mm/init.c   |  21 +
 arch/parisc/kernel/module.c|  20 -
 arch/parisc/mm/init.c  |  23 +-
 arch/powerpc/kernel/module.c   |  63 ---
 arch/powerpc/mm/mem.c  |  64 +++
 arch/riscv/kernel/module.c |  34 
 arch/riscv/mm/init.c   |  35 +
 arch/s390/kernel/module.c  |  27 ---
 arch/s390/mm/init.c|  30 +++
 arch/sparc/kernel/module.c |  19 -
 arch/sparc/mm/Makefile |   2 +
 arch/sparc/mm/execmem.c|  21 +
 arch/x86/kernel/module.c   |  27 ---
 arch/x86/mm/init.c |  29 +++
 23 files changed, 453 insertions(+), 435 deletions(-)
 create mode 100644 arch/sparc/mm/execmem.c

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index a98fdf6ff26c..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,57 +12,14 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
 
 #include 
 #include 
 #include 
 #include 
 
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-static struct execmem_info execmem_info __ro_after_init;
-
-struct execmem_info __init *execmem_arch_setup(void)
-{
-   unsigned long fallback_start = 0, fallback_end = 0;
-
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
-   fallback_start = VMALLOC_START;
-   fallback_end = VMALLOC_END;
-   }
-
-   execmem_info = (struct execmem_info){
-   .ranges = {
-   [EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
-   .pgprot = PAGE_KERNEL_EXEC,
-   .alignment = 1,
-   .fallback_start = fallback_start,
-   .fallback_end   = fallback_end,
-   },
-   },
-   };
-
-   return _info;
-}
-#endif
-
 bool module_init_section(const char *name)
 {
return strstarts(name, ".init") ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long 
end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   

[PATCH v8 11/17] powerpc: extend execmem_params for kprobes allocations

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

powerpc overrides kprobes::alloc_insn_page() to remove writable
permissions when STRICT_MODULE_RWX is on.

Add definition of EXECMEM_KRPOBES to execmem_params to allow using the
generic kprobes::alloc_insn_page() with the desired permissions.

As powerpc uses breakpoint instructions to inject kprobes, it does not
need to constrain kprobe allocations to the modules area and can use the
entire vmalloc address space.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/kernel/kprobes.c | 20 
 arch/powerpc/kernel/module.c  |  7 +++
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9fcd01bb2ce6..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long 
addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
 }
 
-void *alloc_insn_page(void)
-{
-   void *page;
-
-   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
-   if (!page)
-   return NULL;
-
-   if (strict_module_rwx_enabled()) {
-   int err = set_memory_rox((unsigned long)page, 1);
-
-   if (err)
-   goto error;
-   }
-   return page;
-error:
-   execmem_free(page);
-   return NULL;
-}
-
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ac80559015a3..2a23cf7e141b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -94,6 +94,7 @@ static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : 
PAGE_KERNEL_EXEC;
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : 
PAGE_KERNEL_EXEC;
unsigned long fallback_start = 0, fallback_end = 0;
unsigned long start, end;
@@ -132,6 +133,12 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = kprobes_prot,
+   .alignment = 1,
+   },
[EXECMEM_MODULE_DATA] = {
.start  = VMALLOC_START,
.end= VMALLOC_END,
-- 
2.43.0



[PATCH v8 10/17] arm64: extend execmem_info for generated code allocations

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on arm64 can be placed
anywhere in vmalloc address space and currently this is implemented with
overrides of alloc_insn_page() and bpf_jit_alloc_exec() in arm64.

Define EXECMEM_KPROBES and EXECMEM_BPF ranges in arm64::execmem_info and
drop overrides of alloc_insn_page() and bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
---
 arch/arm64/kernel/module.c | 12 
 arch/arm64/kernel/probes/kprobes.c |  7 ---
 arch/arm64/net/bpf_jit_comp.c  | 11 ---
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index b7a7a23f9f8f..a52240ea084b 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -146,6 +146,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_ROX,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
},
};
 
diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..4268678d0e86 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-void *alloc_insn_page(void)
-{
-   return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
-   NUMA_NO_NODE, __builtin_return_address(0));
-}
-
 /* arm kprobe: install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..456f5af239fc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1793,17 +1793,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return VMALLOC_END - VMALLOC_START;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   /* Memory is intended to be executable, reset the pointer tag. */
-   return kasan_reset_tag(vmalloc(size));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
 bool bpf_jit_supports_subprog_tailcalls(void)
 {
-- 
2.43.0



[PATCH v8 09/17] riscv: extend execmem_params for generated code allocations

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and  bpf_jit_alloc_exec().

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END for
32 bit and slightly reorder execmem_params initialization to support both
32 and 64 bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Alexandre Ghiti 
---
 arch/riscv/include/asm/pgtable.h   |  3 +++
 arch/riscv/kernel/module.c | 14 +-
 arch/riscv/kernel/probes/kprobes.c | 10 --
 arch/riscv/net/bpf_jit_core.c  | 13 -
 4 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9f8ea0e33eb1..5f21814e438e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -55,6 +55,9 @@
 #define MODULES_LOWEST_VADDR   (KERNEL_LINK_ADDR - SZ_2G)
 #define MODULES_VADDR  (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
 #define MODULES_END(PFN_ALIGN((unsigned long)&_start))
+#else
+#define MODULES_VADDR  VMALLOC_START
+#define MODULES_ENDVMALLOC_END
 #endif
 
 /*
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..0e6415f00fca 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,7 +906,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
 static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
@@ -919,6 +919,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_READ_EXEC,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = BPF_JIT_REGION_START,
+   .end= BPF_JIT_REGION_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = PAGE_SIZE,
+   },
},
};
 
diff --git a/arch/riscv/kernel/probes/kprobes.c 
b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
-   return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
-VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-__builtin_return_address(0));
-}
-#endif
-
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
-   BPF_JIT_REGION_END, GFP_KERNEL,
-   PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 {
int ret;
-- 
2.43.0



[PATCH v8 08/17] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Extend execmem parameters to accommodate more complex overrides of
module_alloc() by architectures.

This includes specification of a fallback range required by arm, arm64
and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
allocation of KASAN shadow required by s390 and x86 and support for
late initialization of execmem required by arm64.

The core implementation of execmem_alloc() takes care of suppressing
warnings when the initial allocation fails but there is a fallback range
defined.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
Acked-by: Song Liu 
Tested-by: Liviu Dudau 
---
 arch/Kconfig |  8 
 arch/arm/kernel/module.c | 41 
 arch/arm64/Kconfig   |  1 +
 arch/arm64/kernel/module.c   | 55 ++
 arch/powerpc/kernel/module.c | 60 +++--
 arch/s390/kernel/module.c| 54 +++---
 arch/x86/kernel/module.c | 70 +++--
 include/linux/execmem.h  | 30 ++-
 include/linux/moduleloader.h | 12 --
 kernel/module/main.c | 26 +++--
 mm/execmem.c | 75 ++--
 11 files changed, 247 insertions(+), 185 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 65afb1de48b3..4fd0daa54e6c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
  For architectures like powerpc/32 which have constraints on module
  allocation and need to allocate module data outside of module area.
 
+config ARCH_WANTS_EXECMEM_LATE
+   bool
+   help
+ For architectures that do not allocate executable memory early on
+ boot, but rather require its initialization late when there is
+ enough entropy for module space randomization, for instance
+ arm64.
+
 config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..a98fdf6ff26c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -34,23 +35,31 @@
 #endif
 
 #ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   gfp_t gfp_mask = GFP_KERNEL;
-   void *p;
-
-   /* Silence the initial allocation */
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
-   gfp_mask |= __GFP_NOWARN;
-
-   p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-   if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
-   return p;
-   return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   .fallback_end   = fallback_end,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84..74b34a78b7ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,6 +105,7 @@ config ARM64
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES 
&& !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
+   select ARCH_WANTS_EXECMEM_LATE if EXECMEM
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
select ARCH_HAS_UBSAN
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e92da4da1b2a..b7a7a23f9f8f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -108,41 +109,47 @@ static int __init module_init_limits(void)
 
return 0;
 }
-subsys_initcall(module_init_limits);
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct e

[PATCH v8 07/17] mm/execmem, arch: convert simple overrides of module_alloc to execmem

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Several architectures override module_alloc() only to define address
range for code allocations different than VMALLOC address space.

Provide a generic implementation in execmem that uses the parameters for
address space ranges, required alignment and page protections provided
by architectures.

The architectures must fill execmem_info structure and implement
execmem_arch_setup() that returns a pointer to that structure. This way the
execmem initialization won't be called from every architecture, but rather
from a central place, namely a core_initcall() in execmem.

The execmem provides execmem_alloc() API that wraps __vmalloc_node_range()
with the parameters defined by the architectures.  If an architecture does
not implement execmem_arch_setup(), execmem_alloc() will fall back to
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Song Liu 
---
 arch/loongarch/kernel/module.c | 19 --
 arch/mips/kernel/module.c  | 20 --
 arch/nios2/kernel/module.c | 21 ---
 arch/parisc/kernel/module.c| 24 
 arch/riscv/kernel/module.c | 24 
 arch/sparc/kernel/module.c | 20 --
 include/linux/execmem.h| 47 
 mm/execmem.c   | 67 --
 mm/mm_init.c   |  2 +
 9 files changed, 210 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..ca6dd7ea1610 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -490,10 +491,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, 
__builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 9a6c96014904..59225a3cf918 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct mips_hi16 {
@@ -32,11 +33,22 @@ static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
 #ifdef MODULES_VADDR
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 9c97b7513853..0d1ee86631fc 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -18,15 +18,26 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC,
-   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
diff --git a/arch/par

[PATCH v8 06/17] mm: introduce execmem_alloc() and execmem_free()

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
Acked-by: Song Liu 
---
 arch/powerpc/kernel/kprobes.c|  6 ++--
 arch/s390/kernel/ftrace.c|  4 +--
 arch/s390/kernel/kprobes.c   |  4 +--
 arch/s390/kernel/module.c|  5 +--
 arch/sparc/net/bpf_jit_comp_32.c |  8 ++---
 arch/x86/kernel/ftrace.c |  6 ++--
 arch/x86/kernel/kprobes/core.c   |  4 +--
 include/linux/execmem.h  | 57 
 include/linux/moduleloader.h |  3 --
 kernel/bpf/core.c|  6 ++--
 kernel/kprobes.c |  8 ++---
 kernel/module/Kconfig|  1 +
 kernel/module/main.c | 25 +-
 mm/Kconfig   |  3 ++
 mm/Makefile  |  1 +
 mm/execmem.c | 32 ++
 16 files changed, 128 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/execmem.h
 create mode 100644 mm/execmem.c

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..9fcd01bb2ce6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -130,7 +130,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
 
@@ -142,7 +142,7 @@ void *alloc_insn_page(void)
}
return page;
 error:
-   module_memfree(page);
+   execmem_free(page);
return NULL;
 }
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..798249ef5646 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
  *   Author(s): Martin Schwidefsky 
  */
 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
 {
const char *start, *end;
 
-   ftrace_plt = module_alloc(PAGE_SIZE);
+   ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
 
 #define pr_fmt(fmt) "kprobes: " fmt
 
-#include 
 #include 
 #include 
 #include 
@@ -21,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..ac97a905e8cd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
 #ifdef CONFIG_FUNCTION_TRACER
 void module_arch_cleanup(struct module *mod)
 {
-   module_memfree(mod->arch.trampolines_start);
+   execmem_free(mod->arch.trampolines_start);
 }
 #endif
 
@@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct 
module *me,
 
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
-   start = module_alloc(numpages * PAGE_SIZE);
+   start = execmem_alloc(EXECMEM_FTRACE, nu

[PATCH v8 05/17] module: make module_memory_{alloc,free} more self-contained

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Move the logic related to the memory allocation and freeing into
module_memory_alloc() and module_memory_free().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Philippe Mathieu-Daudé 
---
 kernel/module/main.c | 64 +++-
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c1..5b82b069e0d3 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type)
mod_mem_type_is_core_data(type);
 }
 
-static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
+static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
 {
+   unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+   void *ptr;
+
+   mod->mem[type].size = size;
+
if (mod_mem_use_vmalloc(type))
-   return vzalloc(size);
-   return module_alloc(size);
+   ptr = vmalloc(size);
+   else
+   ptr = module_alloc(size);
+
+   if (!ptr)
+   return -ENOMEM;
+
+   /*
+* The pointer to these blocks of memory are stored on the module
+* structure and we keep that around so long as the module is
+* around. We only free that memory when we unload the module.
+* Just mark them as not being a leak then. The .init* ELF
+* sections *do* get freed after boot so we *could* treat them
+* slightly differently with kmemleak_ignore() and only grey
+* them out as they work as typical memory allocations which
+* *do* eventually get freed, but let's just keep things simple
+* and avoid *any* false positives.
+*/
+   kmemleak_not_leak(ptr);
+
+   memset(ptr, 0, size);
+   mod->mem[type].base = ptr;
+
+   return 0;
 }
 
-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(struct module *mod, enum mod_mem_type type)
 {
+   void *ptr = mod->mem[type].base;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
@@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
-   module_memory_free(mod_mem->base, type);
+   module_memory_free(mod, type);
}
 
/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, 
mod->mem[MOD_DATA].size);
-   module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+   module_memory_free(mod, MOD_DATA);
 }
 
 /* Free a module, remove from lists, etc. */
@@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 static int move_module(struct module *mod, struct load_info *info)
 {
int i;
-   void *ptr;
enum mod_mem_type t = 0;
int ret = -ENOMEM;
 
@@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct 
load_info *info)
mod->mem[type].base = NULL;
continue;
}
-   mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
-   ptr = module_memory_alloc(mod->mem[type].size, type);
-   /*
- * The pointer to these blocks of memory are stored on the 
module
- * structure and we keep that around so long as the module is
- * around. We only free that memory when we unload the module.
- * Just mark them as not being a leak then. The .init* ELF
- * sections *do* get freed after boot so we *could* treat them
- * slightly differently with kmemleak_ignore() and only grey
- * them out as they work as typical memory allocations which
- * *do* eventually get freed, but let's just keep things simple
- * and avoid *any* false positives.
-*/
-   kmemleak_not_leak(ptr);
-   if (!ptr) {
+
+   ret = module_memory_alloc(mod, type);
+   if (ret) {
t = type;
goto out_enomem;
}
-   memset(ptr, 0, mod->mem[type].size);
-   mod->mem[type].base = ptr;
}
 
/* Transfer each section which specifies SHF_ALLOC */
@@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
return 0;
 out_enomem:
for (t--; t >= 0; t--)
-   module_memory_free(mod->mem[t].base, t);
+   module_memory_free(mod, t);
return ret;
 }
 
-- 
2.43.0



[PATCH v8 04/17] sparc: simplify module_alloc()

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END
for 32-bit and reduce module_alloc() to

__vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, ...)

as with the new defines the allocations becomes identical for both 32
and 64 bits.

While on it, drop unused include of 

Suggested-by: Sam Ravnborg 
Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Sam Ravnborg 
---
 arch/sparc/include/asm/pgtable_32.h |  2 ++
 arch/sparc/kernel/module.c  | 25 +
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h 
b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct 
*vma,
 
 #define VMALLOC_START   _AC(0xfe60,UL)
 #define VMALLOC_END _AC(0xffc0,UL)
+#define MODULES_VADDR   VMALLOC_START
+#define MODULES_END VMALLOC_END
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..d37adb2a0b54 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,35 +21,12 @@
 
 #include "entry.h"
 
-#ifdef CONFIG_SPARC64
-
-#include 
-
-static void *module_map(unsigned long size)
+void *module_alloc(unsigned long size)
 {
-   if (PAGE_ALIGN(size) > MODULES_LEN)
-   return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
-#else
-static void *module_map(unsigned long size)
-{
-   return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
-   void *ret;
-
-   ret = module_map(size);
-   if (ret)
-   memset(ret, 0, size);
-
-   return ret;
-}
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
-- 
2.43.0



[PATCH v8 03/17] nios2: define virtual address space for modules

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26
cannot reach all of vmalloc address space.

Define module space as 32MiB below the kernel base and switch nios2 to
use vmalloc for module allocations.

Suggested-by: Thomas Gleixner 
Acked-by: Dinh Nguyen 
Acked-by: Song Liu 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/nios2/include/asm/pgtable.h |  5 -
 arch/nios2/kernel/module.c   | 19 ---
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
 #include 
 
 #define VMALLOC_START  CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR  (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
 
 struct mm_struct;
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..9c97b7513853 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -21,23 +21,12 @@
 
 #include 
 
-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x8000 (vmalloc area) to 0xc (kernel) (kmalloc returns
- * addresses in 0xc000)
- */
 void *module_alloc(unsigned long size)
 {
-   if (size == 0)
-   return NULL;
-   return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
-   kfree(module_region);
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+   GFP_KERNEL, PAGE_KERNEL_EXEC,
+   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+   __builtin_return_address(0));
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
-- 
2.43.0



[PATCH v8 02/17] mips: module: rename MODULE_START to MODULES_VADDR

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

and MODULE_END to MODULES_END to match other architectures that define
custom address space for modules.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/mips/include/asm/pgtable-64.h | 4 ++--
 arch/mips/kernel/module.c  | 4 ++--
 arch/mips/mm/fault.c   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
 /* Load modules into 32bit-compatible segment. */
-#define MODULE_START   CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR  CKSSEG
+#define MODULES_END(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
 #define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..9a6c96014904 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -31,10 +31,10 @@ struct mips_hi16 {
 static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
-#ifdef MODULE_START
+#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
-   return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned 
long write,
 
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
-   if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+   if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
 #endif
 
-- 
2.43.0



[PATCH v8 01/17] arm64: module: remove unneeded call to kasan_alloc_module_shadow()

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Since commit f6f37d9320a1 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS
modes") KASAN_VMALLOC is always enabled when KASAN is on. This means
that allocations in module_alloc() will be tracked by KASAN protection
for vmalloc() and that kasan_alloc_module_shadow() will be always an
empty inline and there is no point in calling it.

Drop meaningless call to kasan_alloc_module_shadow() from
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm64/kernel/module.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..e92da4da1b2a 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -141,11 +141,6 @@ void *module_alloc(unsigned long size)
__func__);
}
 
-   if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
-   vfree(p);
-   return NULL;
-   }
-
/* Memory is intended to be executable, reset the pointer tag. */
return kasan_reset_tag(p);
 }
-- 
2.43.0



[PATCH v8 00/17] mm: jit/text allocator

2024-05-05 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Hi,

The patches are also available in git:
https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v8

v8:
* fix intialization of default_execmem_info

v7: https://lore.kernel.org/all/20240429121620.1186447-1-r...@kernel.org
* define MODULE_{VADDR,END} for riscv32 to fix the build and avoid
  #ifdefs in a function body
* add Acks, thanks everybody

v6: https://lore.kernel.org/all/20240426082854.7355-1-r...@kernel.org
* restore patch "arm64: extend execmem_info for generated code
  allocations" that disappeared in v5 rebase
* update execmem initialization so that by default it will be
  initialized early while late initialization will be an opt-in

v5: https://lore.kernel.org/all/20240422094436.3625171-1-r...@kernel.org
* rebase on v6.9-rc4 to avoid a conflict in kprobes
* add copyrights to mm/execmem.c (Luis)
* fix spelling (Ingo)
* define MODULES_VADDDR for sparc (Sam)
* consistently initialize struct execmem_info (Peter)
* reduce #ifdefs in function bodies in kprobes (Masami) 

v4: https://lore.kernel.org/all/20240411160051.2093261-1-r...@kernel.org
* rebase on v6.9-rc2
* rename execmem_params to execmem_info and execmem_arch_params() to
  execmem_arch_setup()
* use single execmem_alloc() API instead of execmem_{text,data}_alloc() (Song)
* avoid extra copy of execmem parameters (Rick)
* run execmem_init() as core_initcall() except for the architectures that
  may allocated text really early (currently only x86) (Will)
* add acks for some of arm64 and riscv changes, thanks Will and Alexandre
* new commits:
  - drop call to kasan_alloc_module_shadow() on arm64 because it's not
needed anymore
  - rename MODULE_START to MODULES_VADDR on MIPS
  - use CONFIG_EXECMEM instead of CONFIG_MODULES on powerpc as per Christophe:
https://lore.kernel.org/all/79062fa3-3402-47b3-8920-9231ad05e...@csgroup.eu/

v3: https://lore.kernel.org/all/20230918072955.2507221-1-r...@kernel.org
* add type parameter to execmem allocation APIs
* remove BPF dependency on modules

v2: https://lore.kernel.org/all/20230616085038.4121892-1-r...@kernel.org
* Separate "module" and "others" allocations with execmem_text_alloc()
and jit_text_alloc()
* Drop ROX entailment on x86
* Add ack for nios2 changes, thanks Dinh Nguyen

v1: https://lore.kernel.org/all/20230601101257.530867-1-r...@kernel.org

= Cover letter from v1 (sligtly updated) =

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystmes
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

A centralized infrastructure for code allocation allows allocations of
executable memory as ROX, and future optimizations such as caching large
pages for better iTLB performance and providing sub-page allocations for
users that only need small jit code snippets.

Rick Edgecombe proposed perm_alloc extension to vmalloc [1] and Song Liu
proposed execmem_alloc [2], but both these approaches were targeting BPF
allocations and lacked the ground work to abstract executable allocations
and split them from the modules core.

Thomas Gleixner suggested to express module allocation restrictions and
requirements as struct mod_alloc_type_params [3] that would define ranges,
protections and other parameters for different types of allocations used by
modules and following that suggestion Song separated allocations of
different types in modules (commit ac3b43283923 ("module: replace
module_layout with module_memory")) and posted "Type aware module
allocator" set [4].

I liked the idea of parametrising code allocation requirements as a
structure, but I believe the original proposal and Song's module allocator
was too module centric, so I came up with these patches.

This set splits code allocation from modules by introducing execmem_alloc()
and and execmem_free(), APIs, replaces call sites of module_alloc() and
module_memfree() with the new APIs and implements core text and related
allocations in a central place.

Instead of architecture specific overrides for module_alloc(), the
architectures that require non-default behaviour for text allocation must
fill execmem_info structure and implement execmem_arch_setup() that returns
a pointer to that structure. If an architecture does not implement
execmem_arch_setup(), the defaults compatible with the current
modules::module_alloc() are used.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem APIs
take a type argument, that will be used to identif

Re: [PATCH v7 00/16] mm: jit/text allocator

2024-05-03 Thread Mike Rapoport
On Fri, May 03, 2024 at 01:23:30AM +0100, Liviu Dudau wrote:
> On Thu, May 02, 2024 at 04:07:05PM -0700, Luis Chamberlain wrote:
> > On Thu, May 02, 2024 at 11:50:36PM +0100, Liviu Dudau wrote:
> > > On Mon, Apr 29, 2024 at 09:29:20AM -0700, Luis Chamberlain wrote:
> > > > On Mon, Apr 29, 2024 at 03:16:04PM +0300, Mike Rapoport wrote:
> > > > > From: "Mike Rapoport (IBM)" 
> > > > > 
> > > > > Hi,
> > > > > 
> > > > > The patches are also available in git:
> > > > > https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v7
> > > > > 
> > > > > v7 changes:
> > > > > * define MODULE_{VADDR,END} for riscv32 to fix the build and avoid
> > > > >   #ifdefs in a function body
> > > > > * add Acks, thanks everybody
> > > > 
> > > > Thanks, I've pushed this to modules-next for further exposure / testing.
> > > > Given the status of testing so far with prior revisions, in that only a
> > > > few issues were found and that those were fixed, and the status of
> > > > reviews, this just might be ripe for v6.10.
> > > 
> > > Looks like there is still some work needed. I've picked up next-20240501
> > > and on arch/mips with CONFIG_MODULE_COMPRESS_XZ=y and 
> > > CONFIG_MODULE_DECOMPRESS=y
> > > I fail to load any module:
> > > 
> > > # modprobe rfkill
> > > [11746.539090] Invalid ELF header magic: != ELF
> > > [11746.587149] execmem: unable to allocate memory
> > > modprobe: can't load module rfkill (kernel/net/rfkill/rfkill.ko.xz): Out 
> > > of memory
> > > 
> > > The (hopefully) relevant parts of my .config:
> > 
> > Thanks for the report! Any chance we can get you to try a bisection? I
> > think it should take 2-3 test boots. To help reduce scope you try 
> > modules-next:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=modules-next
> > 
> > Then can you check by resetting your tree to commmit 3fbe6c2f820a76 (mm:
> > introduce execmem_alloc() and execmem_free()"). I suspect that should
> > boot, so your bad commit would be the tip 3c2c250cb3a5fbb ("bpf: remove
> > CONFIG_BPF_JIT dependency on CONFIG_MODULES of").
> > 
> > That gives us only a few commits to bisect:
> > 
> > git log --oneline 3fbe6c2f820a76bc36d5546bda85832f57c8fce2..
> > 3c2c250cb3a5 (HEAD -> modules-next, korg/modules-next) bpf: remove 
> > CONFIG_BPF_JIT dependency on CONFIG_MODULES of
> > 11e8e65cce5c kprobes: remove dependency on CONFIG_MODULES
> > e10cbc38697b powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where 
> > appropriate
> > 4da3d38f24c5 x86/ftrace: enable dynamic ftrace without CONFIG_MODULES
> > 13ae3d74ee70 arch: make execmem setup available regardless of CONFIG_MODULES
> > 460bbbc70a47 powerpc: extend execmem_params for kprobes allocations
> > e1a14069b5b4 arm64: extend execmem_info for generated code allocations
> > 971e181c6585 riscv: extend execmem_params for generated code allocations
> > 0fa276f26721 mm/execmem, arch: convert remaining overrides of module_alloc 
> > to execmem
> > 022cef244287 mm/execmem, arch: convert simple overrides of module_alloc to 
> > execmem
> > 
> > With 2-3 boots we should be to tell which is the bad commit.
> 
> Looks like 0fa276f26721 is the first bad commit.
> 
> $ git bisect log
> # bad: [3c2c250cb3a5fbbccc4a4ff4c9354c54af91f02c] bpf: remove CONFIG_BPF_JIT 
> dependency on CONFIG_MODULES of
> # good: [3fbe6c2f820a76bc36d5546bda85832f57c8fce2] mm: introduce 
> execmem_alloc() and execmem_free()
> git bisect start '3c2c250cb3a5' '3fbe6c2f820a76'
> # bad: [460bbbc70a47e929b1936ca68979f3b79f168fc6] powerpc: extend 
> execmem_params for kprobes allocations
> git bisect bad 460bbbc70a47e929b1936ca68979f3b79f168fc6
> # bad: [0fa276f26721e0ffc2ae9c7cf67dcc005b43c67e] mm/execmem, arch: convert 
> remaining overrides of module_alloc to execmem
> git bisect bad 0fa276f26721e0ffc2ae9c7cf67dcc005b43c67e
> # good: [022cef2442870db738a366d3b7a636040c081859] mm/execmem, arch: convert 
> simple overrides of module_alloc to execmem
> git bisect good 022cef2442870db738a366d3b7a636040c081859
> # first bad commit: [0fa276f26721e0ffc2ae9c7cf67dcc005b43c67e] mm/execmem, 
> arch: convert remaining overrides of module_alloc to execmem
> 
> Maybe MIPS also needs a ARCH_WANTS_EXECMEM_LATE?

I don't think so. It rather seems there's a bug in the initialization of
the defaults in execmem. This should fix it:

diff --git a/mm/execmem.c b/mm/execmem.c
index f6dc3fabc1ca..0c4b36bc6d10 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -118,7 +118,6 @@ static void __init __execmem_init(void)
info->ranges[EXECMEM_DEFAULT].end = VMALLOC_END;
info->ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL_EXEC;
info->ranges[EXECMEM_DEFAULT].alignment = 1;
-   return;
}
 
if (!execmem_validate(info))
 
> Best regards,
> Liviu

-- 
Sincerely yours,
Mike.


[PATCH v7 16/16] bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

BPF just-in-time compiler depended on CONFIG_MODULES because it used
module_alloc() to allocate memory for the generated code.

Since code allocations are now implemented with execmem, drop dependency of
CONFIG_BPF_JIT on CONFIG_MODULES and make it select CONFIG_EXECMEM.

Suggested-by: Björn Töpel 
Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/bpf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bc25f5098a25..f999e4e0b344 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -43,7 +43,7 @@ config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-   depends on MODULES
+   select EXECMEM
help
  BPF programs are normally handled by a BPF interpreter. This option
  allows the kernel to generate native code when a program is loaded
-- 
2.43.0



[PATCH v7 15/16] kprobes: remove dependency on CONFIG_MODULES

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

kprobes depended on CONFIG_MODULES because it has to allocate memory for
code.

Since code allocations are now implemented with execmem, kprobes can be
enabled in non-modular kernels.

Add #ifdef CONFIG_MODULE guards for the code dealing with kprobes inside
modules, make CONFIG_KPROBES select CONFIG_EXECMEM and drop the
dependency of CONFIG_KPROBES on CONFIG_MODULES.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
---
 arch/Kconfig|  2 +-
 include/linux/module.h  |  9 ++
 kernel/kprobes.c| 55 +++--
 kernel/trace/trace_kprobe.c | 20 +-
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4fd0daa54e6c..caa459964f09 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,9 +52,9 @@ config GENERIC_ENTRY
 
 config KPROBES
bool "Kprobes"
-   depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+   select EXECMEM
select TASKS_RCU if PREEMPTION
help
  Kprobes allows you to trap at almost any kernel address and
diff --git a/include/linux/module.h b/include/linux/module.h
index 1153b0d99a80..ffa1c603163c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod)
return mod->state != MODULE_STATE_GOING;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+return mod->state == MODULE_STATE_COMING;
+}
+
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
@@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module 
*mod, void *ptr)
return ptr;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+   return false;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ddd7cdc16edf..ca2c6cbd42d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,7 +1588,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
}
 
/* Get module refcount and reject __init functions for loaded modules. 
*/
-   if (*probed_mod) {
+   if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
/*
 * We must hold a refcount of the probed module while updating
 * its code to prohibit unexpected unloading.
@@ -1603,12 +1603,13 @@ static int check_kprobe_address_safe(struct kprobe *p,
 * kprobes in there.
 */
if (within_module_init((unsigned long)p->addr, *probed_mod) &&
-   (*probed_mod)->state != MODULE_STATE_COMING) {
+   !module_is_coming(*probed_mod)) {
module_put(*probed_mod);
*probed_mod = NULL;
ret = -ENOENT;
}
}
+
 out:
preempt_enable();
jump_label_unlock();
@@ -2488,24 +2489,6 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
return 0;
 }
 
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
-{
-   struct kprobe_blacklist_entry *ent, *n;
-
-   list_for_each_entry_safe(ent, n, _blacklist, list) {
-   if (ent->start_addr < start || ent->start_addr >= end)
-   continue;
-   list_del(>list);
-   kfree(ent);
-   }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
-   kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   char *type, char *sym)
 {
@@ -2570,6 +2553,25 @@ static int __init populate_kprobe_blacklist(unsigned 
long *start,
return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
+{
+   struct kprobe_blacklist_entry *ent, *n;
+
+   list_for_each_entry_safe(ent, n, _blacklist, list) {
+   if (ent->start_addr < start || ent->start_addr >= end)
+   continue;
+   list_del(>list);
+   kfree(ent);
+   }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+   kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 static void add_module_kprobe_blacklist(struct module *mod)
 {
unsigned long start, end;
@@ -2672,6 +2674,17 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static int kprobe_register_module_notifier(void)

[PATCH v7 14/16] powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

There are places where CONFIG_MODULES guards the code that depends on
memory allocation being done with module_alloc().

Replace CONFIG_MODULES with CONFIG_EXECMEM in such places.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/Kconfig | 2 +-
 arch/powerpc/include/asm/kasan.h | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 4 ++--
 arch/powerpc/kernel/head_book3s_32.S | 6 +++---
 arch/powerpc/lib/code-patching.c | 2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..2e586733a464 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,7 +285,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
-   select KASAN_VMALLOCif KASAN && MODULES
+   select KASAN_VMALLOCif KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT   3
 
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
 #define KASAN_KERN_START   ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START   PAGE_OFFSET
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr   SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcrr11
compare_to_kernel_boundary r10, r10
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+3f
rlwinm  r10, r10, 0, 20, 31
orisr10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SDR1
li  r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm  r2, r2, 28, 0xf000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li  r0, 3
bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc.   r1,r1,r2/* check access & ~permission */
bne-InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi  r2, r0, 0, 31, 31   /* userspace ? -> PP lsb */
 #endif
ori r1, r1, 0xe06   /* clear out reserved bits */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..7af791446ddf 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
 
 static unsigned long get_patch_pfn(void *addr)
 {
-   if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+   if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
 
 static bool is_module_segment(unsigned long addr)
 {
-   if (!IS_ENABLED(CONFIG_MODULES))
+   if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
-- 
2.43.0



[PATCH v7 13/16] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Dynamic ftrace must allocate memory for code and this was impossible
without CONFIG_MODULES.

With execmem separated from the modules code, execmem_text_alloc() is
available regardless of CONFIG_MODULES.

Remove dependency of dynamic ftrace on CONFIG_MODULES and make
CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/x86/Kconfig |  1 +
 arch/x86/kernel/ftrace.c | 10 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4474bf32d0a4..f2917ccf4fb4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+   select EXECMEM if DYNAMIC_FTRACE
 
 config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c8ddb7abda7c..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -261,8 +261,6 @@ void arch_ftrace_update_code(int command)
 /* Currently only x86_64 supports dynamic trampolines */
 #ifdef CONFIG_X86_64
 
-#ifdef CONFIG_MODULES
-/* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
return execmem_alloc(EXECMEM_FTRACE, size);
@@ -271,14 +269,6 @@ static inline void tramp_free(void *tramp)
 {
execmem_free(tramp);
 }
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
-   return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-- 
2.43.0



[PATCH v7 12/16] arch: make execmem setup available regardless of CONFIG_MODULES

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

execmem does not depend on modules, on the contrary modules use
execmem.

To make execmem available when CONFIG_MODULES=n, for instance for
kprobes, split execmem_params initialization out from
arch/*/kernel/module.c and compile it when CONFIG_EXECMEM=y

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Philippe Mathieu-Daudé 
---
 arch/arm/kernel/module.c   |  43 --
 arch/arm/mm/init.c |  45 +++
 arch/arm64/kernel/module.c | 140 -
 arch/arm64/mm/init.c   | 140 +
 arch/loongarch/kernel/module.c |  19 -
 arch/loongarch/mm/init.c   |  21 +
 arch/mips/kernel/module.c  |  22 --
 arch/mips/mm/init.c|  23 ++
 arch/nios2/kernel/module.c |  20 -
 arch/nios2/mm/init.c   |  21 +
 arch/parisc/kernel/module.c|  20 -
 arch/parisc/mm/init.c  |  23 +-
 arch/powerpc/kernel/module.c   |  63 ---
 arch/powerpc/mm/mem.c  |  64 +++
 arch/riscv/kernel/module.c |  34 
 arch/riscv/mm/init.c   |  35 +
 arch/s390/kernel/module.c  |  27 ---
 arch/s390/mm/init.c|  30 +++
 arch/sparc/kernel/module.c |  19 -
 arch/sparc/mm/Makefile |   2 +
 arch/sparc/mm/execmem.c|  21 +
 arch/x86/kernel/module.c   |  27 ---
 arch/x86/mm/init.c |  29 +++
 23 files changed, 453 insertions(+), 435 deletions(-)
 create mode 100644 arch/sparc/mm/execmem.c

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index a98fdf6ff26c..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,57 +12,14 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
 
 #include 
 #include 
 #include 
 #include 
 
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-static struct execmem_info execmem_info __ro_after_init;
-
-struct execmem_info __init *execmem_arch_setup(void)
-{
-   unsigned long fallback_start = 0, fallback_end = 0;
-
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
-   fallback_start = VMALLOC_START;
-   fallback_end = VMALLOC_END;
-   }
-
-   execmem_info = (struct execmem_info){
-   .ranges = {
-   [EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
-   .pgprot = PAGE_KERNEL_EXEC,
-   .alignment = 1,
-   .fallback_start = fallback_start,
-   .fallback_end   = fallback_end,
-   },
-   },
-   };
-
-   return _info;
-}
-#endif
-
 bool module_init_section(const char *name)
 {
return strstarts(name, ".init") ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long 
end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   

[PATCH v7 11/16] powerpc: extend execmem_params for kprobes allocations

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

powerpc overrides kprobes::alloc_insn_page() to remove writable
permissions when STRICT_MODULE_RWX is on.

Add definition of EXECMEM_KRPOBES to execmem_params to allow using the
generic kprobes::alloc_insn_page() with the desired permissions.

As powerpc uses breakpoint instructions to inject kprobes, it does not
need to constrain kprobe allocations to the modules area and can use the
entire vmalloc address space.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/kernel/kprobes.c | 20 
 arch/powerpc/kernel/module.c  |  7 +++
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9fcd01bb2ce6..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long 
addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
 }
 
-void *alloc_insn_page(void)
-{
-   void *page;
-
-   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
-   if (!page)
-   return NULL;
-
-   if (strict_module_rwx_enabled()) {
-   int err = set_memory_rox((unsigned long)page, 1);
-
-   if (err)
-   goto error;
-   }
-   return page;
-error:
-   execmem_free(page);
-   return NULL;
-}
-
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ac80559015a3..2a23cf7e141b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -94,6 +94,7 @@ static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : 
PAGE_KERNEL_EXEC;
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : 
PAGE_KERNEL_EXEC;
unsigned long fallback_start = 0, fallback_end = 0;
unsigned long start, end;
@@ -132,6 +133,12 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = kprobes_prot,
+   .alignment = 1,
+   },
[EXECMEM_MODULE_DATA] = {
.start  = VMALLOC_START,
.end= VMALLOC_END,
-- 
2.43.0



[PATCH v7 10/16] arm64: extend execmem_info for generated code allocations

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on arm64 can be placed
anywhere in vmalloc address space and currently this is implemented with
overrides of alloc_insn_page() and bpf_jit_alloc_exec() in arm64.

Define EXECMEM_KPROBES and EXECMEM_BPF ranges in arm64::execmem_info and
drop overrides of alloc_insn_page() and bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
---
 arch/arm64/kernel/module.c | 12 
 arch/arm64/kernel/probes/kprobes.c |  7 ---
 arch/arm64/net/bpf_jit_comp.c  | 11 ---
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index b7a7a23f9f8f..a52240ea084b 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -146,6 +146,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_ROX,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
},
};
 
diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..4268678d0e86 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-void *alloc_insn_page(void)
-{
-   return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
-   NUMA_NO_NODE, __builtin_return_address(0));
-}
-
 /* arm kprobe: install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..456f5af239fc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1793,17 +1793,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return VMALLOC_END - VMALLOC_START;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   /* Memory is intended to be executable, reset the pointer tag. */
-   return kasan_reset_tag(vmalloc(size));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
 bool bpf_jit_supports_subprog_tailcalls(void)
 {
-- 
2.43.0



[PATCH v7 09/16] riscv: extend execmem_params for generated code allocations

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and  bpf_jit_alloc_exec().

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END for
32 bit and slightly reorder execmem_params initialization to support both
32 and 64 bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Alexandre Ghiti 
---
 arch/riscv/include/asm/pgtable.h   |  3 +++
 arch/riscv/kernel/module.c | 14 +-
 arch/riscv/kernel/probes/kprobes.c | 10 --
 arch/riscv/net/bpf_jit_core.c  | 13 -
 4 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9f8ea0e33eb1..5f21814e438e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -55,6 +55,9 @@
 #define MODULES_LOWEST_VADDR   (KERNEL_LINK_ADDR - SZ_2G)
 #define MODULES_VADDR  (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
 #define MODULES_END(PFN_ALIGN((unsigned long)&_start))
+#else
+#define MODULES_VADDR  VMALLOC_START
+#define MODULES_ENDVMALLOC_END
 #endif
 
 /*
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..0e6415f00fca 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,7 +906,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
 static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
@@ -919,6 +919,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_READ_EXEC,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = BPF_JIT_REGION_START,
+   .end= BPF_JIT_REGION_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = PAGE_SIZE,
+   },
},
};
 
diff --git a/arch/riscv/kernel/probes/kprobes.c 
b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
-   return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
-VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-__builtin_return_address(0));
-}
-#endif
-
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
-   BPF_JIT_REGION_END, GFP_KERNEL,
-   PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 {
int ret;
-- 
2.43.0



[PATCH v7 08/16] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Extend execmem parameters to accommodate more complex overrides of
module_alloc() by architectures.

This includes specification of a fallback range required by arm, arm64
and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
allocation of KASAN shadow required by s390 and x86 and support for
late initialization of execmem required by arm64.

The core implementation of execmem_alloc() takes care of suppressing
warnings when the initial allocation fails but there is a fallback range
defined.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
Acked-by: Song Liu 
---
 arch/Kconfig |  8 
 arch/arm/kernel/module.c | 41 
 arch/arm64/Kconfig   |  1 +
 arch/arm64/kernel/module.c   | 55 ++
 arch/powerpc/kernel/module.c | 60 +++--
 arch/s390/kernel/module.c| 54 +++---
 arch/x86/kernel/module.c | 70 +++--
 include/linux/execmem.h  | 30 ++-
 include/linux/moduleloader.h | 12 --
 kernel/module/main.c | 26 +++--
 mm/execmem.c | 75 ++--
 11 files changed, 247 insertions(+), 185 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 65afb1de48b3..4fd0daa54e6c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
  For architectures like powerpc/32 which have constraints on module
  allocation and need to allocate module data outside of module area.
 
+config ARCH_WANTS_EXECMEM_LATE
+   bool
+   help
+ For architectures that do not allocate executable memory early on
+ boot, but rather require its initialization late when there is
+ enough entropy for module space randomization, for instance
+ arm64.
+
 config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..a98fdf6ff26c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -34,23 +35,31 @@
 #endif
 
 #ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   gfp_t gfp_mask = GFP_KERNEL;
-   void *p;
-
-   /* Silence the initial allocation */
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
-   gfp_mask |= __GFP_NOWARN;
-
-   p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-   if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
-   return p;
-   return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   .fallback_end   = fallback_end,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84..74b34a78b7ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,6 +105,7 @@ config ARM64
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES 
&& !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
+   select ARCH_WANTS_EXECMEM_LATE if EXECMEM
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
select ARCH_HAS_UBSAN
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e92da4da1b2a..b7a7a23f9f8f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -108,41 +109,47 @@ static int __init module_init_limits(void)
 
return 0;
 }
-subsys_initcall(module_init_limits);
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execm

[PATCH v7 07/16] mm/execmem, arch: convert simple overrides of module_alloc to execmem

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Several architectures override module_alloc() only to define address
range for code allocations different than VMALLOC address space.

Provide a generic implementation in execmem that uses the parameters for
address space ranges, required alignment and page protections provided
by architectures.

The architectures must fill execmem_info structure and implement
execmem_arch_setup() that returns a pointer to that structure. This way the
execmem initialization won't be called from every architecture, but rather
from a central place, namely a core_initcall() in execmem.

The execmem provides execmem_alloc() API that wraps __vmalloc_node_range()
with the parameters defined by the architectures.  If an architecture does
not implement execmem_arch_setup(), execmem_alloc() will fall back to
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Song Liu 
---
 arch/loongarch/kernel/module.c | 19 --
 arch/mips/kernel/module.c  | 20 --
 arch/nios2/kernel/module.c | 21 ---
 arch/parisc/kernel/module.c| 24 
 arch/riscv/kernel/module.c | 24 
 arch/sparc/kernel/module.c | 20 --
 include/linux/execmem.h| 47 
 mm/execmem.c   | 67 --
 mm/mm_init.c   |  2 +
 9 files changed, 210 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..ca6dd7ea1610 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -490,10 +491,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, 
__builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 9a6c96014904..59225a3cf918 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct mips_hi16 {
@@ -32,11 +33,22 @@ static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
 #ifdef MODULES_VADDR
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 9c97b7513853..0d1ee86631fc 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -18,15 +18,26 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC,
-   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
diff --git a/arch/par

[PATCH v7 06/16] mm: introduce execmem_alloc() and execmem_free()

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
Acked-by: Song Liu 
---
 arch/powerpc/kernel/kprobes.c|  6 ++--
 arch/s390/kernel/ftrace.c|  4 +--
 arch/s390/kernel/kprobes.c   |  4 +--
 arch/s390/kernel/module.c|  5 +--
 arch/sparc/net/bpf_jit_comp_32.c |  8 ++---
 arch/x86/kernel/ftrace.c |  6 ++--
 arch/x86/kernel/kprobes/core.c   |  4 +--
 include/linux/execmem.h  | 57 
 include/linux/moduleloader.h |  3 --
 kernel/bpf/core.c|  6 ++--
 kernel/kprobes.c |  8 ++---
 kernel/module/Kconfig|  1 +
 kernel/module/main.c | 25 +-
 mm/Kconfig   |  3 ++
 mm/Makefile  |  1 +
 mm/execmem.c | 32 ++
 16 files changed, 128 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/execmem.h
 create mode 100644 mm/execmem.c

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..9fcd01bb2ce6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -130,7 +130,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
 
@@ -142,7 +142,7 @@ void *alloc_insn_page(void)
}
return page;
 error:
-   module_memfree(page);
+   execmem_free(page);
return NULL;
 }
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..798249ef5646 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
  *   Author(s): Martin Schwidefsky 
  */
 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
 {
const char *start, *end;
 
-   ftrace_plt = module_alloc(PAGE_SIZE);
+   ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
 
 #define pr_fmt(fmt) "kprobes: " fmt
 
-#include 
 #include 
 #include 
 #include 
@@ -21,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..ac97a905e8cd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
 #ifdef CONFIG_FUNCTION_TRACER
 void module_arch_cleanup(struct module *mod)
 {
-   module_memfree(mod->arch.trampolines_start);
+   execmem_free(mod->arch.trampolines_start);
 }
 #endif
 
@@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct 
module *me,
 
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
-   start = module_alloc(numpages * PAGE_SIZE);
+   start = execmem_alloc(EXECMEM_FTRACE, nu

[PATCH v7 05/16] module: make module_memory_{alloc,free} more self-contained

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Move the logic related to the memory allocation and freeing into
module_memory_alloc() and module_memory_free().

Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/module/main.c | 64 +++-
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c1..5b82b069e0d3 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type)
mod_mem_type_is_core_data(type);
 }
 
-static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
+static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
 {
+   unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+   void *ptr;
+
+   mod->mem[type].size = size;
+
if (mod_mem_use_vmalloc(type))
-   return vzalloc(size);
-   return module_alloc(size);
+   ptr = vmalloc(size);
+   else
+   ptr = module_alloc(size);
+
+   if (!ptr)
+   return -ENOMEM;
+
+   /*
+* The pointer to these blocks of memory are stored on the module
+* structure and we keep that around so long as the module is
+* around. We only free that memory when we unload the module.
+* Just mark them as not being a leak then. The .init* ELF
+* sections *do* get freed after boot so we *could* treat them
+* slightly differently with kmemleak_ignore() and only grey
+* them out as they work as typical memory allocations which
+* *do* eventually get freed, but let's just keep things simple
+* and avoid *any* false positives.
+*/
+   kmemleak_not_leak(ptr);
+
+   memset(ptr, 0, size);
+   mod->mem[type].base = ptr;
+
+   return 0;
 }
 
-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(struct module *mod, enum mod_mem_type type)
 {
+   void *ptr = mod->mem[type].base;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
@@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
-   module_memory_free(mod_mem->base, type);
+   module_memory_free(mod, type);
}
 
/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, 
mod->mem[MOD_DATA].size);
-   module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+   module_memory_free(mod, MOD_DATA);
 }
 
 /* Free a module, remove from lists, etc. */
@@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 static int move_module(struct module *mod, struct load_info *info)
 {
int i;
-   void *ptr;
enum mod_mem_type t = 0;
int ret = -ENOMEM;
 
@@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct 
load_info *info)
mod->mem[type].base = NULL;
continue;
}
-   mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
-   ptr = module_memory_alloc(mod->mem[type].size, type);
-   /*
- * The pointer to these blocks of memory are stored on the 
module
- * structure and we keep that around so long as the module is
- * around. We only free that memory when we unload the module.
- * Just mark them as not being a leak then. The .init* ELF
- * sections *do* get freed after boot so we *could* treat them
- * slightly differently with kmemleak_ignore() and only grey
- * them out as they work as typical memory allocations which
- * *do* eventually get freed, but let's just keep things simple
- * and avoid *any* false positives.
-*/
-   kmemleak_not_leak(ptr);
-   if (!ptr) {
+
+   ret = module_memory_alloc(mod, type);
+   if (ret) {
t = type;
goto out_enomem;
}
-   memset(ptr, 0, mod->mem[type].size);
-   mod->mem[type].base = ptr;
}
 
/* Transfer each section which specifies SHF_ALLOC */
@@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
return 0;
 out_enomem:
for (t--; t >= 0; t--)
-   module_memory_free(mod->mem[t].base, t);
+   module_memory_free(mod, t);
return ret;
 }
 
-- 
2.43.0



[PATCH v7 04/16] sparc: simplify module_alloc()

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END
for 32-bit and reduce module_alloc() to

__vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, ...)

as with the new defines the allocations becomes identical for both 32
and 64 bits.

While on it, drop unused include of 

Suggested-by: Sam Ravnborg 
Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Sam Ravnborg 
---
 arch/sparc/include/asm/pgtable_32.h |  2 ++
 arch/sparc/kernel/module.c  | 25 +
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h 
b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct 
*vma,
 
 #define VMALLOC_START   _AC(0xfe60,UL)
 #define VMALLOC_END _AC(0xffc0,UL)
+#define MODULES_VADDR   VMALLOC_START
+#define MODULES_END VMALLOC_END
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..d37adb2a0b54 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,35 +21,12 @@
 
 #include "entry.h"
 
-#ifdef CONFIG_SPARC64
-
-#include 
-
-static void *module_map(unsigned long size)
+void *module_alloc(unsigned long size)
 {
-   if (PAGE_ALIGN(size) > MODULES_LEN)
-   return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
-#else
-static void *module_map(unsigned long size)
-{
-   return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
-   void *ret;
-
-   ret = module_map(size);
-   if (ret)
-   memset(ret, 0, size);
-
-   return ret;
-}
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
-- 
2.43.0



[PATCH v7 03/16] nios2: define virtual address space for modules

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26
cannot reach all of vmalloc address space.

Define module space as 32MiB below the kernel base and switch nios2 to
use vmalloc for module allocations.

Suggested-by: Thomas Gleixner 
Acked-by: Dinh Nguyen 
Acked-by: Song Liu 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/nios2/include/asm/pgtable.h |  5 -
 arch/nios2/kernel/module.c   | 19 ---
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
 #include 
 
 #define VMALLOC_START  CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR  (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
 
 struct mm_struct;
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..9c97b7513853 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -21,23 +21,12 @@
 
 #include 
 
-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x8000 (vmalloc area) to 0xc (kernel) (kmalloc returns
- * addresses in 0xc000)
- */
 void *module_alloc(unsigned long size)
 {
-   if (size == 0)
-   return NULL;
-   return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
-   kfree(module_region);
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+   GFP_KERNEL, PAGE_KERNEL_EXEC,
+   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+   __builtin_return_address(0));
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
-- 
2.43.0



[PATCH v7 02/16] mips: module: rename MODULE_START to MODULES_VADDR

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

and MODULE_END to MODULES_END to match other architectures that define
custom address space for modules.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/mips/include/asm/pgtable-64.h | 4 ++--
 arch/mips/kernel/module.c  | 4 ++--
 arch/mips/mm/fault.c   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
 /* Load modules into 32bit-compatible segment. */
-#define MODULE_START   CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR  CKSSEG
+#define MODULES_END(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
 #define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..9a6c96014904 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -31,10 +31,10 @@ struct mips_hi16 {
 static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
-#ifdef MODULE_START
+#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
-   return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned 
long write,
 
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
-   if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+   if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
 #endif
 
-- 
2.43.0



[PATCH v7 01/16] arm64: module: remove unneeded call to kasan_alloc_module_shadow()

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Since commit f6f37d9320a1 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS
modes") KASAN_VMALLOC is always enabled when KASAN is on. This means
that allocations in module_alloc() will be tracked by KASAN protection
for vmalloc() and that kasan_alloc_module_shadow() will be always an
empty inline and there is no point in calling it.

Drop meaningless call to kasan_alloc_module_shadow() from
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm64/kernel/module.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..e92da4da1b2a 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -141,11 +141,6 @@ void *module_alloc(unsigned long size)
__func__);
}
 
-   if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
-   vfree(p);
-   return NULL;
-   }
-
/* Memory is intended to be executable, reset the pointer tag. */
return kasan_reset_tag(p);
 }
-- 
2.43.0



[PATCH v7 00/16] mm: jit/text allocator

2024-04-29 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Hi,

The patches are also available in git:
https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v7

v7 changes:
* define MODULE_{VADDR,END} for riscv32 to fix the build and avoid
  #ifdefs in a function body
* add Acks, thanks everybody

v6: https://lore.kernel.org/all/20240426082854.7355-1-r...@kernel.org
* restore patch "arm64: extend execmem_info for generated code
  allocations" that disappeared in v5 rebase
* update execmem initialization so that by default it will be
  initialized early while late initialization will be an opt-in

v5: https://lore.kernel.org/all/20240422094436.3625171-1-r...@kernel.org
* rebase on v6.9-rc4 to avoid a conflict in kprobes
* add copyrights to mm/execmem.c (Luis)
* fix spelling (Ingo)
* define MODULES_VADDDR for sparc (Sam)
* consistently initialize struct execmem_info (Peter)
* reduce #ifdefs in function bodies in kprobes (Masami) 

v4: https://lore.kernel.org/all/20240411160051.2093261-1-r...@kernel.org
* rebase on v6.9-rc2
* rename execmem_params to execmem_info and execmem_arch_params() to
  execmem_arch_setup()
* use single execmem_alloc() API instead of execmem_{text,data}_alloc() (Song)
* avoid extra copy of execmem parameters (Rick)
* run execmem_init() as core_initcall() except for the architectures that
  may allocated text really early (currently only x86) (Will)
* add acks for some of arm64 and riscv changes, thanks Will and Alexandre
* new commits:
  - drop call to kasan_alloc_module_shadow() on arm64 because it's not
needed anymore
  - rename MODULE_START to MODULES_VADDR on MIPS
  - use CONFIG_EXECMEM instead of CONFIG_MODULES on powerpc as per Christophe:
https://lore.kernel.org/all/79062fa3-3402-47b3-8920-9231ad05e...@csgroup.eu/

v3: https://lore.kernel.org/all/20230918072955.2507221-1-r...@kernel.org
* add type parameter to execmem allocation APIs
* remove BPF dependency on modules

v2: https://lore.kernel.org/all/20230616085038.4121892-1-r...@kernel.org
* Separate "module" and "others" allocations with execmem_text_alloc()
and jit_text_alloc()
* Drop ROX entailment on x86
* Add ack for nios2 changes, thanks Dinh Nguyen

v1: https://lore.kernel.org/all/20230601101257.530867-1-r...@kernel.org

= Cover letter from v1 (sligtly updated) =

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystmes
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

A centralized infrastructure for code allocation allows allocations of
executable memory as ROX, and future optimizations such as caching large
pages for better iTLB performance and providing sub-page allocations for
users that only need small jit code snippets.

Rick Edgecombe proposed perm_alloc extension to vmalloc [1] and Song Liu
proposed execmem_alloc [2], but both these approaches were targeting BPF
allocations and lacked the ground work to abstract executable allocations
and split them from the modules core.

Thomas Gleixner suggested to express module allocation restrictions and
requirements as struct mod_alloc_type_params [3] that would define ranges,
protections and other parameters for different types of allocations used by
modules and following that suggestion Song separated allocations of
different types in modules (commit ac3b43283923 ("module: replace
module_layout with module_memory")) and posted "Type aware module
allocator" set [4].

I liked the idea of parametrising code allocation requirements as a
structure, but I believe the original proposal and Song's module allocator
was too module centric, so I came up with these patches.

This set splits code allocation from modules by introducing execmem_alloc()
and and execmem_free(), APIs, replaces call sites of module_alloc() and
module_memfree() with the new APIs and implements core text and related
allocations in a central place.

Instead of architecture specific overrides for module_alloc(), the
architectures that require non-default behaviour for text allocation must
fill execmem_info structure and implement execmem_arch_setup() that returns
a pointer to that structure. If an architecture does not implement
execmem_arch_setup(), the defaults compatible with the current
modules::module_alloc() are used.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem APIs
take a type argument, that will be used to identify the calling subsystem
and to allow architectures to define parameters for ranges suitable for that
subsyste

Re: [PATCH v6 08/16] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

2024-04-28 Thread Mike Rapoport
On Fri, Apr 26, 2024 at 12:01:34PM -0700, Song Liu wrote:
> On Fri, Apr 26, 2024 at 1:30 AM Mike Rapoport  wrote:
> >
> > From: "Mike Rapoport (IBM)" 
> >
> > Extend execmem parameters to accommodate more complex overrides of
> > module_alloc() by architectures.
> >
> > This includes specification of a fallback range required by arm, arm64
> > and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
> > allocation of KASAN shadow required by s390 and x86 and support for
> > late initialization of execmem required by arm64.
> >
> > The core implementation of execmem_alloc() takes care of suppressing
> > warnings when the initial allocation fails but there is a fallback range
> > defined.
> >
> > Signed-off-by: Mike Rapoport (IBM) 
> > Acked-by: Will Deacon 
> 
> nit: We should probably move the logic for ARCH_WANTS_EXECMEM_LATE
> to a separate patch.

This would require to split arm64 and I prefer to keep all these changes
together. 

> Otherwise,
> 
> Acked-by: Song Liu 
 
Thanks!

-- 
Sincerely yours,
Mike.


[PATCH v6 16/16] bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

BPF just-in-time compiler depended on CONFIG_MODULES because it used
module_alloc() to allocate memory for the generated code.

Since code allocations are now implemented with execmem, drop dependency of
CONFIG_BPF_JIT on CONFIG_MODULES and make it select CONFIG_EXECMEM.

Suggested-by: Björn Töpel 
Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/bpf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bc25f5098a25..f999e4e0b344 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -43,7 +43,7 @@ config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-   depends on MODULES
+   select EXECMEM
help
  BPF programs are normally handled by a BPF interpreter. This option
  allows the kernel to generate native code when a program is loaded
-- 
2.43.0



[PATCH v6 15/16] kprobes: remove dependency on CONFIG_MODULES

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

kprobes depended on CONFIG_MODULES because it has to allocate memory for
code.

Since code allocations are now implemented with execmem, kprobes can be
enabled in non-modular kernels.

Add #ifdef CONFIG_MODULE guards for the code dealing with kprobes inside
modules, make CONFIG_KPROBES select CONFIG_EXECMEM and drop the
dependency of CONFIG_KPROBES on CONFIG_MODULES.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/Kconfig|  2 +-
 include/linux/module.h  |  9 ++
 kernel/kprobes.c| 55 +++--
 kernel/trace/trace_kprobe.c | 20 +-
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4fd0daa54e6c..caa459964f09 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,9 +52,9 @@ config GENERIC_ENTRY
 
 config KPROBES
bool "Kprobes"
-   depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+   select EXECMEM
select TASKS_RCU if PREEMPTION
help
  Kprobes allows you to trap at almost any kernel address and
diff --git a/include/linux/module.h b/include/linux/module.h
index 1153b0d99a80..ffa1c603163c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod)
return mod->state != MODULE_STATE_GOING;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+return mod->state == MODULE_STATE_COMING;
+}
+
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
@@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module 
*mod, void *ptr)
return ptr;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+   return false;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ddd7cdc16edf..ca2c6cbd42d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,7 +1588,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
}
 
/* Get module refcount and reject __init functions for loaded modules. 
*/
-   if (*probed_mod) {
+   if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
/*
 * We must hold a refcount of the probed module while updating
 * its code to prohibit unexpected unloading.
@@ -1603,12 +1603,13 @@ static int check_kprobe_address_safe(struct kprobe *p,
 * kprobes in there.
 */
if (within_module_init((unsigned long)p->addr, *probed_mod) &&
-   (*probed_mod)->state != MODULE_STATE_COMING) {
+   !module_is_coming(*probed_mod)) {
module_put(*probed_mod);
*probed_mod = NULL;
ret = -ENOENT;
}
}
+
 out:
preempt_enable();
jump_label_unlock();
@@ -2488,24 +2489,6 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
return 0;
 }
 
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
-{
-   struct kprobe_blacklist_entry *ent, *n;
-
-   list_for_each_entry_safe(ent, n, _blacklist, list) {
-   if (ent->start_addr < start || ent->start_addr >= end)
-   continue;
-   list_del(>list);
-   kfree(ent);
-   }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
-   kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   char *type, char *sym)
 {
@@ -2570,6 +2553,25 @@ static int __init populate_kprobe_blacklist(unsigned 
long *start,
return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
+{
+   struct kprobe_blacklist_entry *ent, *n;
+
+   list_for_each_entry_safe(ent, n, _blacklist, list) {
+   if (ent->start_addr < start || ent->start_addr >= end)
+   continue;
+   list_del(>list);
+   kfree(ent);
+   }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+   kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 static void add_module_kprobe_blacklist(struct module *mod)
 {
unsigned long start, end;
@@ -2672,6 +2674,17 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static int kprobe_register_module_notifier(void)
+{
+   return register_module_no

[PATCH v6 14/16] powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

There are places where CONFIG_MODULES guards the code that depends on
memory allocation being done with module_alloc().

Replace CONFIG_MODULES with CONFIG_EXECMEM in such places.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/Kconfig | 2 +-
 arch/powerpc/include/asm/kasan.h | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 4 ++--
 arch/powerpc/kernel/head_book3s_32.S | 6 +++---
 arch/powerpc/lib/code-patching.c | 2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..2e586733a464 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,7 +285,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
-   select KASAN_VMALLOCif KASAN && MODULES
+   select KASAN_VMALLOCif KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT   3
 
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
 #define KASAN_KERN_START   ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START   PAGE_OFFSET
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr   SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcrr11
compare_to_kernel_boundary r10, r10
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+3f
rlwinm  r10, r10, 0, 20, 31
orisr10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SDR1
li  r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm  r2, r2, 28, 0xf000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li  r0, 3
bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc.   r1,r1,r2/* check access & ~permission */
bne-InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi  r2, r0, 0, 31, 31   /* userspace ? -> PP lsb */
 #endif
ori r1, r1, 0xe06   /* clear out reserved bits */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..7af791446ddf 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
 
 static unsigned long get_patch_pfn(void *addr)
 {
-   if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+   if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
 
 static bool is_module_segment(unsigned long addr)
 {
-   if (!IS_ENABLED(CONFIG_MODULES))
+   if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
-- 
2.43.0



[PATCH v6 13/16] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Dynamic ftrace must allocate memory for code and this was impossible
without CONFIG_MODULES.

With execmem separated from the modules code, execmem_text_alloc() is
available regardless of CONFIG_MODULES.

Remove dependency of dynamic ftrace on CONFIG_MODULES and make
CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/x86/Kconfig |  1 +
 arch/x86/kernel/ftrace.c | 10 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4474bf32d0a4..f2917ccf4fb4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+   select EXECMEM if DYNAMIC_FTRACE
 
 config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c8ddb7abda7c..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -261,8 +261,6 @@ void arch_ftrace_update_code(int command)
 /* Currently only x86_64 supports dynamic trampolines */
 #ifdef CONFIG_X86_64
 
-#ifdef CONFIG_MODULES
-/* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
return execmem_alloc(EXECMEM_FTRACE, size);
@@ -271,14 +269,6 @@ static inline void tramp_free(void *tramp)
 {
execmem_free(tramp);
 }
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
-   return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-- 
2.43.0



[PATCH v6 12/16] arch: make execmem setup available regardless of CONFIG_MODULES

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

execmem does not depend on modules, on the contrary modules use
execmem.

To make execmem available when CONFIG_MODULES=n, for instance for
kprobes, split execmem_params initialization out from
arch/*/kernel/module.c and compile it when CONFIG_EXECMEM=y

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm/kernel/module.c   |  43 --
 arch/arm/mm/init.c |  45 +++
 arch/arm64/kernel/module.c | 140 -
 arch/arm64/mm/init.c   | 140 +
 arch/loongarch/kernel/module.c |  19 -
 arch/loongarch/mm/init.c   |  21 +
 arch/mips/kernel/module.c  |  22 --
 arch/mips/mm/init.c|  23 ++
 arch/nios2/kernel/module.c |  20 -
 arch/nios2/mm/init.c   |  21 +
 arch/parisc/kernel/module.c|  20 -
 arch/parisc/mm/init.c  |  23 +-
 arch/powerpc/kernel/module.c   |  63 ---
 arch/powerpc/mm/mem.c  |  64 +++
 arch/riscv/kernel/module.c |  44 ---
 arch/riscv/mm/init.c   |  45 +++
 arch/s390/kernel/module.c  |  27 ---
 arch/s390/mm/init.c|  30 +++
 arch/sparc/kernel/module.c |  19 -
 arch/sparc/mm/Makefile |   2 +
 arch/sparc/mm/execmem.c|  21 +
 arch/x86/kernel/module.c   |  27 ---
 arch/x86/mm/init.c |  29 +++
 23 files changed, 463 insertions(+), 445 deletions(-)
 create mode 100644 arch/sparc/mm/execmem.c

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index a98fdf6ff26c..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,57 +12,14 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
 
 #include 
 #include 
 #include 
 #include 
 
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-static struct execmem_info execmem_info __ro_after_init;
-
-struct execmem_info __init *execmem_arch_setup(void)
-{
-   unsigned long fallback_start = 0, fallback_end = 0;
-
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
-   fallback_start = VMALLOC_START;
-   fallback_end = VMALLOC_END;
-   }
-
-   execmem_info = (struct execmem_info){
-   .ranges = {
-   [EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
-   .pgprot = PAGE_KERNEL_EXEC,
-   .alignment = 1,
-   .fallback_start = fallback_start,
-   .fallback_end   = fallback_end,
-   },
-   },
-   };
-
-   return _info;
-}
-#endif
-
 bool module_init_section(const char *name)
 {
return strstarts(name, ".init") ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long 
end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   

[PATCH v6 11/16] powerpc: extend execmem_params for kprobes allocations

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

powerpc overrides kprobes::alloc_insn_page() to remove writable
permissions when STRICT_MODULE_RWX is on.

Add definition of EXECMEM_KRPOBES to execmem_params to allow using the
generic kprobes::alloc_insn_page() with the desired permissions.

As powerpc uses breakpoint instructions to inject kprobes, it does not
need to constrain kprobe allocations to the modules area and can use the
entire vmalloc address space.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/kernel/kprobes.c | 20 
 arch/powerpc/kernel/module.c  |  7 +++
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9fcd01bb2ce6..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long 
addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
 }
 
-void *alloc_insn_page(void)
-{
-   void *page;
-
-   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
-   if (!page)
-   return NULL;
-
-   if (strict_module_rwx_enabled()) {
-   int err = set_memory_rox((unsigned long)page, 1);
-
-   if (err)
-   goto error;
-   }
-   return page;
-error:
-   execmem_free(page);
-   return NULL;
-}
-
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ac80559015a3..2a23cf7e141b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -94,6 +94,7 @@ static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : 
PAGE_KERNEL_EXEC;
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : 
PAGE_KERNEL_EXEC;
unsigned long fallback_start = 0, fallback_end = 0;
unsigned long start, end;
@@ -132,6 +133,12 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = kprobes_prot,
+   .alignment = 1,
+   },
[EXECMEM_MODULE_DATA] = {
.start  = VMALLOC_START,
.end= VMALLOC_END,
-- 
2.43.0



[PATCH v6 10/16] arm64: extend execmem_info for generated code allocations

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on arm64 can be placed
anywhere in vmalloc address space and currently this is implemented with
overrides of alloc_insn_page() and bpf_jit_alloc_exec() in arm64.

Define EXECMEM_KPROBES and EXECMEM_BPF ranges in arm64::execmem_info and
drop overrides of alloc_insn_page() and bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
---
 arch/arm64/kernel/module.c | 12 
 arch/arm64/kernel/probes/kprobes.c |  7 ---
 arch/arm64/net/bpf_jit_comp.c  | 11 ---
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index b7a7a23f9f8f..a52240ea084b 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -146,6 +146,18 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_ROX,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
},
};
 
diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..4268678d0e86 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-void *alloc_insn_page(void)
-{
-   return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
-   NUMA_NO_NODE, __builtin_return_address(0));
-}
-
 /* arm kprobe: install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..456f5af239fc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1793,17 +1793,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return VMALLOC_END - VMALLOC_START;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   /* Memory is intended to be executable, reset the pointer tag. */
-   return kasan_reset_tag(vmalloc(size));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
 bool bpf_jit_supports_subprog_tailcalls(void)
 {
-- 
2.43.0



[PATCH v6 09/16] riscv: extend execmem_params for generated code allocations

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and  bpf_jit_alloc_exec().

Slightly reorder execmem_params initialization to support both 32 and 64
bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Alexandre Ghiti 
---
 arch/riscv/kernel/module.c | 28 +---
 arch/riscv/kernel/probes/kprobes.c | 10 --
 arch/riscv/net/bpf_jit_core.c  | 13 -
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..2ecbacbc9993 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,19 +906,41 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
 static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   unsigned long start, end;
+
+   if (IS_ENABLED(CONFIG_64BIT)) {
+   start = MODULES_VADDR;
+   end = MODULES_END;
+   } else {
+   start = VMALLOC_START;
+   end = VMALLOC_END;
+   }
+
execmem_info = (struct execmem_info){
.ranges = {
[EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
+   .start  = start,
+   .end= end,
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_READ_EXEC,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = BPF_JIT_REGION_START,
+   .end= BPF_JIT_REGION_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = PAGE_SIZE,
+   },
},
};
 
diff --git a/arch/riscv/kernel/probes/kprobes.c 
b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
-   return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
-VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-__builtin_return_address(0));
-}
-#endif
-
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
-   BPF_JIT_REGION_END, GFP_KERNEL,
-   PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 {
int ret;
-- 
2.43.0



[PATCH v6 08/16] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Extend execmem parameters to accommodate more complex overrides of
module_alloc() by architectures.

This includes specification of a fallback range required by arm, arm64
and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
allocation of KASAN shadow required by s390 and x86 and support for
late initialization of execmem required by arm64.

The core implementation of execmem_alloc() takes care of suppressing
warnings when the initial allocation fails but there is a fallback range
defined.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
---
 arch/Kconfig |  8 
 arch/arm/kernel/module.c | 41 
 arch/arm64/Kconfig   |  1 +
 arch/arm64/kernel/module.c   | 55 ++
 arch/powerpc/kernel/module.c | 60 +++--
 arch/s390/kernel/module.c| 54 +++---
 arch/x86/kernel/module.c | 70 +++--
 include/linux/execmem.h  | 30 ++-
 include/linux/moduleloader.h | 12 --
 kernel/module/main.c | 26 +++--
 mm/execmem.c | 75 ++--
 11 files changed, 247 insertions(+), 185 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 65afb1de48b3..4fd0daa54e6c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
  For architectures like powerpc/32 which have constraints on module
  allocation and need to allocate module data outside of module area.
 
+config ARCH_WANTS_EXECMEM_LATE
+   bool
+   help
+ For architectures that do not allocate executable memory early on
+ boot, but rather require its initialization late when there is
+ enough entropy for module space randomization, for instance
+ arm64.
+
 config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..a98fdf6ff26c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -34,23 +35,31 @@
 #endif
 
 #ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   gfp_t gfp_mask = GFP_KERNEL;
-   void *p;
-
-   /* Silence the initial allocation */
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
-   gfp_mask |= __GFP_NOWARN;
-
-   p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-   if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
-   return p;
-   return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   .fallback_end   = fallback_end,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84..74b34a78b7ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,6 +105,7 @@ config ARM64
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES 
&& !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
+   select ARCH_WANTS_EXECMEM_LATE if EXECMEM
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
select ARCH_HAS_UBSAN
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e92da4da1b2a..b7a7a23f9f8f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -108,41 +109,47 @@ static int __init module_init_limits(void)
 
return 0;
 }
-subsys_initcall(module_init_limits);
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   

[PATCH v6 07/16] mm/execmem, arch: convert simple overrides of module_alloc to execmem

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Several architectures override module_alloc() only to define address
range for code allocations different than VMALLOC address space.

Provide a generic implementation in execmem that uses the parameters for
address space ranges, required alignment and page protections provided
by architectures.

The architectures must fill execmem_info structure and implement
execmem_arch_setup() that returns a pointer to that structure. This way the
execmem initialization won't be called from every architecture, but rather
from a central place, namely a core_initcall() in execmem.

The execmem provides execmem_alloc() API that wraps __vmalloc_node_range()
with the parameters defined by the architectures.  If an architecture does
not implement execmem_arch_setup(), execmem_alloc() will fall back to
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/loongarch/kernel/module.c | 19 --
 arch/mips/kernel/module.c  | 20 --
 arch/nios2/kernel/module.c | 21 ---
 arch/parisc/kernel/module.c| 24 
 arch/riscv/kernel/module.c | 24 
 arch/sparc/kernel/module.c | 20 --
 include/linux/execmem.h| 47 
 mm/execmem.c   | 67 --
 mm/mm_init.c   |  2 +
 9 files changed, 210 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..ca6dd7ea1610 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -490,10 +491,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, 
__builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 9a6c96014904..59225a3cf918 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct mips_hi16 {
@@ -32,11 +33,22 @@ static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
 #ifdef MODULES_VADDR
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 9c97b7513853..0d1ee86631fc 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -18,15 +18,26 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC,
-   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
diff --git a/arch/parisc/kernel/modul

[PATCH v6 06/16] mm: introduce execmem_alloc() and execmem_free()

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
---
 arch/powerpc/kernel/kprobes.c|  6 ++--
 arch/s390/kernel/ftrace.c|  4 +--
 arch/s390/kernel/kprobes.c   |  4 +--
 arch/s390/kernel/module.c|  5 +--
 arch/sparc/net/bpf_jit_comp_32.c |  8 ++---
 arch/x86/kernel/ftrace.c |  6 ++--
 arch/x86/kernel/kprobes/core.c   |  4 +--
 include/linux/execmem.h  | 57 
 include/linux/moduleloader.h |  3 --
 kernel/bpf/core.c|  6 ++--
 kernel/kprobes.c |  8 ++---
 kernel/module/Kconfig|  1 +
 kernel/module/main.c | 25 +-
 mm/Kconfig   |  3 ++
 mm/Makefile  |  1 +
 mm/execmem.c | 32 ++
 16 files changed, 128 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/execmem.h
 create mode 100644 mm/execmem.c

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..9fcd01bb2ce6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -130,7 +130,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
 
@@ -142,7 +142,7 @@ void *alloc_insn_page(void)
}
return page;
 error:
-   module_memfree(page);
+   execmem_free(page);
return NULL;
 }
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..798249ef5646 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
  *   Author(s): Martin Schwidefsky 
  */
 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
 {
const char *start, *end;
 
-   ftrace_plt = module_alloc(PAGE_SIZE);
+   ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
 
 #define pr_fmt(fmt) "kprobes: " fmt
 
-#include 
 #include 
 #include 
 #include 
@@ -21,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..ac97a905e8cd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
 #ifdef CONFIG_FUNCTION_TRACER
 void module_arch_cleanup(struct module *mod)
 {
-   module_memfree(mod->arch.trampolines_start);
+   execmem_free(mod->arch.trampolines_start);
 }
 #endif
 
@@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct 
module *me,
 
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
-   start = module_alloc(numpages * PAGE_SIZE);
+   start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if 

[PATCH v6 05/16] module: make module_memory_{alloc,free} more self-contained

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Move the logic related to the memory allocation and freeing into
module_memory_alloc() and module_memory_free().

Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/module/main.c | 64 +++-
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c1..5b82b069e0d3 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type)
mod_mem_type_is_core_data(type);
 }
 
-static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
+static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
 {
+   unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+   void *ptr;
+
+   mod->mem[type].size = size;
+
if (mod_mem_use_vmalloc(type))
-   return vzalloc(size);
-   return module_alloc(size);
+   ptr = vmalloc(size);
+   else
+   ptr = module_alloc(size);
+
+   if (!ptr)
+   return -ENOMEM;
+
+   /*
+* The pointer to these blocks of memory are stored on the module
+* structure and we keep that around so long as the module is
+* around. We only free that memory when we unload the module.
+* Just mark them as not being a leak then. The .init* ELF
+* sections *do* get freed after boot so we *could* treat them
+* slightly differently with kmemleak_ignore() and only grey
+* them out as they work as typical memory allocations which
+* *do* eventually get freed, but let's just keep things simple
+* and avoid *any* false positives.
+*/
+   kmemleak_not_leak(ptr);
+
+   memset(ptr, 0, size);
+   mod->mem[type].base = ptr;
+
+   return 0;
 }
 
-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(struct module *mod, enum mod_mem_type type)
 {
+   void *ptr = mod->mem[type].base;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
@@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
-   module_memory_free(mod_mem->base, type);
+   module_memory_free(mod, type);
}
 
/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, 
mod->mem[MOD_DATA].size);
-   module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+   module_memory_free(mod, MOD_DATA);
 }
 
 /* Free a module, remove from lists, etc. */
@@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 static int move_module(struct module *mod, struct load_info *info)
 {
int i;
-   void *ptr;
enum mod_mem_type t = 0;
int ret = -ENOMEM;
 
@@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct 
load_info *info)
mod->mem[type].base = NULL;
continue;
}
-   mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
-   ptr = module_memory_alloc(mod->mem[type].size, type);
-   /*
- * The pointer to these blocks of memory are stored on the 
module
- * structure and we keep that around so long as the module is
- * around. We only free that memory when we unload the module.
- * Just mark them as not being a leak then. The .init* ELF
- * sections *do* get freed after boot so we *could* treat them
- * slightly differently with kmemleak_ignore() and only grey
- * them out as they work as typical memory allocations which
- * *do* eventually get freed, but let's just keep things simple
- * and avoid *any* false positives.
-*/
-   kmemleak_not_leak(ptr);
-   if (!ptr) {
+
+   ret = module_memory_alloc(mod, type);
+   if (ret) {
t = type;
goto out_enomem;
}
-   memset(ptr, 0, mod->mem[type].size);
-   mod->mem[type].base = ptr;
}
 
/* Transfer each section which specifies SHF_ALLOC */
@@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
return 0;
 out_enomem:
for (t--; t >= 0; t--)
-   module_memory_free(mod->mem[t].base, t);
+   module_memory_free(mod, t);
return ret;
 }
 
-- 
2.43.0



[PATCH v6 04/16] sparc: simplify module_alloc()

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END
for 32-bit and reduce module_alloc() to

__vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, ...)

as with the new defines the allocations becomes identical for both 32
and 64 bits.

While on it, drop unused include of 

Suggested-by: Sam Ravnborg 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/sparc/include/asm/pgtable_32.h |  2 ++
 arch/sparc/kernel/module.c  | 25 +
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h 
b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct 
*vma,
 
 #define VMALLOC_START   _AC(0xfe60,UL)
 #define VMALLOC_END _AC(0xffc0,UL)
+#define MODULES_VADDR   VMALLOC_START
+#define MODULES_END VMALLOC_END
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..d37adb2a0b54 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,35 +21,12 @@
 
 #include "entry.h"
 
-#ifdef CONFIG_SPARC64
-
-#include 
-
-static void *module_map(unsigned long size)
+void *module_alloc(unsigned long size)
 {
-   if (PAGE_ALIGN(size) > MODULES_LEN)
-   return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
-#else
-static void *module_map(unsigned long size)
-{
-   return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
-   void *ret;
-
-   ret = module_map(size);
-   if (ret)
-   memset(ret, 0, size);
-
-   return ret;
-}
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
-- 
2.43.0



[PATCH v6 03/16] nios2: define virtual address space for modules

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26
cannot reach all of vmalloc address space.

Define module space as 32MiB below the kernel base and switch nios2 to
use vmalloc for module allocations.

Suggested-by: Thomas Gleixner 
Acked-by: Dinh Nguyen 
Acked-by: Song Liu 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/nios2/include/asm/pgtable.h |  5 -
 arch/nios2/kernel/module.c   | 19 ---
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
 #include 
 
 #define VMALLOC_START  CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR  (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
 
 struct mm_struct;
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..9c97b7513853 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -21,23 +21,12 @@
 
 #include 
 
-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x8000 (vmalloc area) to 0xc (kernel) (kmalloc returns
- * addresses in 0xc000)
- */
 void *module_alloc(unsigned long size)
 {
-   if (size == 0)
-   return NULL;
-   return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
-   kfree(module_region);
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+   GFP_KERNEL, PAGE_KERNEL_EXEC,
+   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+   __builtin_return_address(0));
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
-- 
2.43.0



[PATCH v6 02/16] mips: module: rename MODULE_START to MODULES_VADDR

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

and MODULE_END to MODULES_END to match other architectures that define
custom address space for modules.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/mips/include/asm/pgtable-64.h | 4 ++--
 arch/mips/kernel/module.c  | 4 ++--
 arch/mips/mm/fault.c   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
 /* Load modules into 32bit-compatible segment. */
-#define MODULE_START   CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR  CKSSEG
+#define MODULES_END(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
 #define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..9a6c96014904 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -31,10 +31,10 @@ struct mips_hi16 {
 static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
-#ifdef MODULE_START
+#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
-   return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned 
long write,
 
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
-   if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+   if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
 #endif
 
-- 
2.43.0



[PATCH v6 01/16] arm64: module: remove unneeded call to kasan_alloc_module_shadow()

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Since commit f6f37d9320a1 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS
modes") KASAN_VMALLOC is always enabled when KASAN is on. This means
that allocations in module_alloc() will be tracked by KASAN protection
for vmalloc() and that kasan_alloc_module_shadow() will be always an
empty inline and there is no point in calling it.

Drop meaningless call to kasan_alloc_module_shadow() from
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm64/kernel/module.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..e92da4da1b2a 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -141,11 +141,6 @@ void *module_alloc(unsigned long size)
__func__);
}
 
-   if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
-   vfree(p);
-   return NULL;
-   }
-
/* Memory is intended to be executable, reset the pointer tag. */
return kasan_reset_tag(p);
 }
-- 
2.43.0



[PATCH v6 00/16] mm: jit/text allocator

2024-04-26 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Hi,

The patches are also available in git:
https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v6

v6 changes:
* restore patch "arm64: extend execmem_info for generated code
  allocations" that disappeared in v5 rebase
* update execmem initialization so that by default it will be
  initialized early while late initialization will be an opt-in

v5: https://lore.kernel.org/all/20240422094436.3625171-1-r...@kernel.org
* rebase on v6.9-rc4 to avoid a conflict in kprobes
* add copyrights to mm/execmem.c (Luis)
* fix spelling (Ingo)
* define MODULES_VADDDR for sparc (Sam)
* consistently initialize struct execmem_info (Peter)
* reduce #ifdefs in function bodies in kprobes (Masami) 

v4: https://lore.kernel.org/all/20240411160051.2093261-1-r...@kernel.org
* rebase on v6.9-rc2
* rename execmem_params to execmem_info and execmem_arch_params() to
  execmem_arch_setup()
* use single execmem_alloc() API instead of execmem_{text,data}_alloc() (Song)
* avoid extra copy of execmem parameters (Rick)
* run execmem_init() as core_initcall() except for the architectures that
  may allocated text really early (currently only x86) (Will)
* add acks for some of arm64 and riscv changes, thanks Will and Alexandre
* new commits:
  - drop call to kasan_alloc_module_shadow() on arm64 because it's not
needed anymore
  - rename MODULE_START to MODULES_VADDR on MIPS
  - use CONFIG_EXECMEM instead of CONFIG_MODULES on powerpc as per Christophe:
https://lore.kernel.org/all/79062fa3-3402-47b3-8920-9231ad05e...@csgroup.eu/

v3: https://lore.kernel.org/all/20230918072955.2507221-1-r...@kernel.org
* add type parameter to execmem allocation APIs
* remove BPF dependency on modules

v2: https://lore.kernel.org/all/20230616085038.4121892-1-r...@kernel.org
* Separate "module" and "others" allocations with execmem_text_alloc()
and jit_text_alloc()
* Drop ROX entailment on x86
* Add ack for nios2 changes, thanks Dinh Nguyen

v1: https://lore.kernel.org/all/20230601101257.530867-1-r...@kernel.org

= Cover letter from v1 (sligtly updated) =

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystmes
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

A centralized infrastructure for code allocation allows allocations of
executable memory as ROX, and future optimizations such as caching large
pages for better iTLB performance and providing sub-page allocations for
users that only need small jit code snippets.

Rick Edgecombe proposed perm_alloc extension to vmalloc [1] and Song Liu
proposed execmem_alloc [2], but both these approaches were targeting BPF
allocations and lacked the ground work to abstract executable allocations
and split them from the modules core.

Thomas Gleixner suggested to express module allocation restrictions and
requirements as struct mod_alloc_type_params [3] that would define ranges,
protections and other parameters for different types of allocations used by
modules and following that suggestion Song separated allocations of
different types in modules (commit ac3b43283923 ("module: replace
module_layout with module_memory")) and posted "Type aware module
allocator" set [4].

I liked the idea of parametrising code allocation requirements as a
structure, but I believe the original proposal and Song's module allocator
was too module centric, so I came up with these patches.

This set splits code allocation from modules by introducing execmem_alloc()
and and execmem_free(), APIs, replaces call sites of module_alloc() and
module_memfree() with the new APIs and implements core text and related
allocations in a central place.

Instead of architecture specific overrides for module_alloc(), the
architectures that require non-default behaviour for text allocation must
fill execmem_info structure and implement execmem_arch_setup() that returns
a pointer to that structure. If an architecture does not implement
execmem_arch_setup(), the defaults compatible with the current
modules::module_alloc() are used.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem APIs
take a type argument, that will be used to identify the calling subsystem
and to allow architectures to define parameters for ranges suitable for that
subsystem.

The new infrastructure allows decoupling of BPF, kprobes and ftrace from
modules, and most importantly it paves the way for ROX allocations for
executable memory.

[1] 
https://lore.kernel.or

Re: linux-next: boot failure after merge of the modules tree

2024-04-24 Thread Mike Rapoport
On Wed, Apr 24, 2024 at 06:35:03PM +1000, Stephen Rothwell wrote:
> Hi all,
> 
> After merging the modules tree, today's linux-next boot (powerpc
> pseries_le_defconfig) failed like this:
> 
> BUG: Kernel NULL pointer dereference at 0x0030
> Faulting instruction address: 0xc057a4ec
> Oops: Kernel access of bad area, sig: 11 [#1]
> LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> Modules linked in:
> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.9.0-rc5-08179-ga5ea707d10dc #1
> Hardware name: IBM pSeries (emulated by qemu) POWER8 (raw) 0x4d0200 0xf04 
> of:SLOF,HEAD pSeries
> NIP:  c057a4ec LR: c02cd32c CTR: c02cd304
> REGS: c4997700 TRAP: 0380   Not tainted  
> (6.9.0-rc5-08179-ga5ea707d10dc)
> MSR:  82009033   CR: 84002484  XER: 2000
> CFAR: c02cd328 IRQMASK: 0 
> GPR00: c02cd32c c49979a0 c163a500 0001 
> GPR04: 0001 4000  2cc0 
> GPR08: 0030 0100  2000 
> GPR12: c02cd304 c2b7 c00c  
> GPR16:     
> GPR20:     
> GPR24:   c2aa0940 c26c0a40 
> GPR28: 0001 c02cd32c 0030 c27d0f78 
> NIP [c057a4ec] execmem_alloc+0x5c/0x12c
> LR [c02cd32c] alloc_insn_page+0x28/0x70
> Call Trace:
> [c4997a40] [c02cd32c] alloc_insn_page+0x28/0x70
> [c4997a60] [c02d07a4] __get_insn_slot+0x1cc/0x29c
> [c4997aa0] [c005c434] arch_prepare_kprobe+0xbc/0x31c
> [c4997b20] [c02d1b40] register_kprobe+0x54c/0x878
> [c4997b90] [c2018828] arch_init_kprobes+0x28/0x40
> [c4997bb0] [c204b33c] init_kprobes+0x138/0x218
> [c4997c30] [c0010da8] do_one_initcall+0x80/0x2f8
> [c4997d00] [c2005aa8] kernel_init_freeable+0x1f8/0x520
> [c4997de0] [c0011148] kernel_init+0x34/0x26c
> [c4997e50] [c000debc] ret_from_kernel_user_thread+0x14/0x1c
> --- interrupt: 0 at 0x0
> Code: fbe1fff8 3940 38e02cc0 7c9c2378 7fa802a6 e8c91e48 f8010010 fb41ffd0 
> 39200100 fb61ffd8 f821ff61 7fc64214 <7ca6402a> eb5e0020 837e0028 e8de0008 
> ---[ end trace  ]---
> 
> note: swapper/0[1] exited with irqs disabled
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b
> 
> Bisected to commit
> 
>   18da532eefc8 ("mm/execmem, arch: convert remaining overrides of 
> module_alloc to execmem")
> 
> I have used the modules tree from next-20240423 for today.
> 
> This is a qemu boot test using:
> 
> qemu-system-ppc64 -M pseries -cpu POWER8 -m 2G -vga none -nographic -kernel 
> ~/next/powerpc_pseries_le_defconfig/vmlinux -initrd ./ppc64le-rootfs.cpio.gz

This should fix it for now, I'll rework initialization a bit in v6
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..bea33bf538e9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -176,6 +176,7 @@ config PPC
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP   if PPC_RADIX_MMU
+   select ARCH_WANTS_EXECMEM_EARLY if EXECMEM
select ARCH_WANTS_MODULES_DATA_IN_VMALLOC   if PPC_BOOK3S_32 || 
PPC_8xx
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF


> -- 
> Cheers,
> Stephen Rothwell

-- 
Sincerely yours,
Mike.


[PATCH v5 15/15] bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

BPF just-in-time compiler depended on CONFIG_MODULES because it used
module_alloc() to allocate memory for the generated code.

Since code allocations are now implemented with execmem, drop dependency of
CONFIG_BPF_JIT on CONFIG_MODULES and make it select CONFIG_EXECMEM.

Suggested-by: Björn Töpel 
Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/bpf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bc25f5098a25..f999e4e0b344 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -43,7 +43,7 @@ config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-   depends on MODULES
+   select EXECMEM
help
  BPF programs are normally handled by a BPF interpreter. This option
  allows the kernel to generate native code when a program is loaded
-- 
2.43.0



[PATCH v5 14/15] kprobes: remove dependency on CONFIG_MODULES

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

kprobes depended on CONFIG_MODULES because it has to allocate memory for
code.

Since code allocations are now implemented with execmem, kprobes can be
enabled in non-modular kernels.

Add #ifdef CONFIG_MODULE guards for the code dealing with kprobes inside
modules, make CONFIG_KPROBES select CONFIG_EXECMEM and drop the
dependency of CONFIG_KPROBES on CONFIG_MODULES.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/Kconfig|  2 +-
 include/linux/module.h  |  9 ++
 kernel/kprobes.c| 55 +++--
 kernel/trace/trace_kprobe.c | 20 +-
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 7006f71f0110..a48ce6a488b3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,9 +52,9 @@ config GENERIC_ENTRY
 
 config KPROBES
bool "Kprobes"
-   depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+   select EXECMEM
select TASKS_RCU if PREEMPTION
help
  Kprobes allows you to trap at almost any kernel address and
diff --git a/include/linux/module.h b/include/linux/module.h
index 1153b0d99a80..ffa1c603163c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod)
return mod->state != MODULE_STATE_GOING;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+return mod->state == MODULE_STATE_COMING;
+}
+
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
@@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module 
*mod, void *ptr)
return ptr;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+   return false;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ddd7cdc16edf..ca2c6cbd42d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,7 +1588,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
}
 
/* Get module refcount and reject __init functions for loaded modules. 
*/
-   if (*probed_mod) {
+   if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
/*
 * We must hold a refcount of the probed module while updating
 * its code to prohibit unexpected unloading.
@@ -1603,12 +1603,13 @@ static int check_kprobe_address_safe(struct kprobe *p,
 * kprobes in there.
 */
if (within_module_init((unsigned long)p->addr, *probed_mod) &&
-   (*probed_mod)->state != MODULE_STATE_COMING) {
+   !module_is_coming(*probed_mod)) {
module_put(*probed_mod);
*probed_mod = NULL;
ret = -ENOENT;
}
}
+
 out:
preempt_enable();
jump_label_unlock();
@@ -2488,24 +2489,6 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
return 0;
 }
 
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
-{
-   struct kprobe_blacklist_entry *ent, *n;
-
-   list_for_each_entry_safe(ent, n, _blacklist, list) {
-   if (ent->start_addr < start || ent->start_addr >= end)
-   continue;
-   list_del(>list);
-   kfree(ent);
-   }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
-   kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   char *type, char *sym)
 {
@@ -2570,6 +2553,25 @@ static int __init populate_kprobe_blacklist(unsigned 
long *start,
return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
+{
+   struct kprobe_blacklist_entry *ent, *n;
+
+   list_for_each_entry_safe(ent, n, _blacklist, list) {
+   if (ent->start_addr < start || ent->start_addr >= end)
+   continue;
+   list_del(>list);
+   kfree(ent);
+   }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+   kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 static void add_module_kprobe_blacklist(struct module *mod)
 {
unsigned long start, end;
@@ -2672,6 +2674,17 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static int kprobe_register_module_notifier(void)
+{
+   return register_module_no

[PATCH v5 13/15] powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

There are places where CONFIG_MODULES guards the code that depends on
memory allocation being done with module_alloc().

Replace CONFIG_MODULES with CONFIG_EXECMEM in such places.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/Kconfig | 2 +-
 arch/powerpc/include/asm/kasan.h | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 4 ++--
 arch/powerpc/kernel/head_book3s_32.S | 6 +++---
 arch/powerpc/lib/code-patching.c | 2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..2e586733a464 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,7 +285,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
-   select KASAN_VMALLOCif KASAN && MODULES
+   select KASAN_VMALLOCif KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT   3
 
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
 #define KASAN_KERN_START   ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START   PAGE_OFFSET
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr   SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcrr11
compare_to_kernel_boundary r10, r10
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+3f
rlwinm  r10, r10, 0, 20, 31
orisr10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SDR1
li  r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm  r2, r2, 28, 0xf000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li  r0, 3
bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc.   r1,r1,r2/* check access & ~permission */
bne-InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi  r2, r0, 0, 31, 31   /* userspace ? -> PP lsb */
 #endif
ori r1, r1, 0xe06   /* clear out reserved bits */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..7af791446ddf 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
 
 static unsigned long get_patch_pfn(void *addr)
 {
-   if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+   if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
 
 static bool is_module_segment(unsigned long addr)
 {
-   if (!IS_ENABLED(CONFIG_MODULES))
+   if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
-- 
2.43.0



[PATCH v5 12/15] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Dynamic ftrace must allocate memory for code and this was impossible
without CONFIG_MODULES.

With execmem separated from the modules code, execmem_text_alloc() is
available regardless of CONFIG_MODULES.

Remove dependency of dynamic ftrace on CONFIG_MODULES and make
CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/x86/Kconfig |  1 +
 arch/x86/kernel/ftrace.c | 10 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3f5ba72c9480..cd8addb96a0b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+   select EXECMEM if DYNAMIC_FTRACE
 
 config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c8ddb7abda7c..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -261,8 +261,6 @@ void arch_ftrace_update_code(int command)
 /* Currently only x86_64 supports dynamic trampolines */
 #ifdef CONFIG_X86_64
 
-#ifdef CONFIG_MODULES
-/* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
return execmem_alloc(EXECMEM_FTRACE, size);
@@ -271,14 +269,6 @@ static inline void tramp_free(void *tramp)
 {
execmem_free(tramp);
 }
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
-   return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-- 
2.43.0



[PATCH v5 11/15] arch: make execmem setup available regardless of CONFIG_MODULES

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

execmem does not depend on modules, on the contrary modules use
execmem.

To make execmem available when CONFIG_MODULES=n, for instance for
kprobes, split execmem_params initialization out from
arch/*/kernel/module.c and compile it when CONFIG_EXECMEM=y

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm/kernel/module.c   |  43 --
 arch/arm/mm/init.c |  45 +++
 arch/arm64/kernel/module.c | 140 -
 arch/arm64/mm/init.c   | 140 +
 arch/loongarch/kernel/module.c |  19 -
 arch/loongarch/mm/init.c   |  21 +
 arch/mips/kernel/module.c  |  22 --
 arch/mips/mm/init.c|  23 ++
 arch/nios2/kernel/module.c |  20 -
 arch/nios2/mm/init.c   |  21 +
 arch/parisc/kernel/module.c|  20 -
 arch/parisc/mm/init.c  |  23 +-
 arch/powerpc/kernel/module.c   |  63 ---
 arch/powerpc/mm/mem.c  |  64 +++
 arch/riscv/kernel/module.c |  44 ---
 arch/riscv/mm/init.c   |  45 +++
 arch/s390/kernel/module.c  |  27 ---
 arch/s390/mm/init.c|  30 +++
 arch/sparc/kernel/module.c |  19 -
 arch/sparc/mm/Makefile |   2 +
 arch/sparc/mm/execmem.c|  21 +
 arch/x86/kernel/module.c   |  27 ---
 arch/x86/mm/init.c |  29 +++
 23 files changed, 463 insertions(+), 445 deletions(-)
 create mode 100644 arch/sparc/mm/execmem.c

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index a98fdf6ff26c..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,57 +12,14 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
 
 #include 
 #include 
 #include 
 #include 
 
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-static struct execmem_info execmem_info __ro_after_init;
-
-struct execmem_info __init *execmem_arch_setup(void)
-{
-   unsigned long fallback_start = 0, fallback_end = 0;
-
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
-   fallback_start = VMALLOC_START;
-   fallback_end = VMALLOC_END;
-   }
-
-   execmem_info = (struct execmem_info){
-   .ranges = {
-   [EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
-   .pgprot = PAGE_KERNEL_EXEC,
-   .alignment = 1,
-   .fallback_start = fallback_start,
-   .fallback_end   = fallback_end,
-   },
-   },
-   };
-
-   return _info;
-}
-#endif
-
 bool module_init_section(const char *name)
 {
return strstarts(name, ".init") ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long 
end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   

[PATCH v5 10/15] powerpc: extend execmem_params for kprobes allocations

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

powerpc overrides kprobes::alloc_insn_page() to remove writable
permissions when STRICT_MODULE_RWX is on.

Add definition of EXECMEM_KRPOBES to execmem_params to allow using the
generic kprobes::alloc_insn_page() with the desired permissions.

As powerpc uses breakpoint instructions to inject kprobes, it does not
need to constrain kprobe allocations to the modules area and can use the
entire vmalloc address space.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/kernel/kprobes.c | 20 
 arch/powerpc/kernel/module.c  |  7 +++
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9fcd01bb2ce6..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long 
addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
 }
 
-void *alloc_insn_page(void)
-{
-   void *page;
-
-   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
-   if (!page)
-   return NULL;
-
-   if (strict_module_rwx_enabled()) {
-   int err = set_memory_rox((unsigned long)page, 1);
-
-   if (err)
-   goto error;
-   }
-   return page;
-error:
-   execmem_free(page);
-   return NULL;
-}
-
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ac80559015a3..2a23cf7e141b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -94,6 +94,7 @@ static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : 
PAGE_KERNEL_EXEC;
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : 
PAGE_KERNEL_EXEC;
unsigned long fallback_start = 0, fallback_end = 0;
unsigned long start, end;
@@ -132,6 +133,12 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = kprobes_prot,
+   .alignment = 1,
+   },
[EXECMEM_MODULE_DATA] = {
.start  = VMALLOC_START,
.end= VMALLOC_END,
-- 
2.43.0



[PATCH v5 09/15] riscv: extend execmem_params for generated code allocations

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and  bpf_jit_alloc_exec().

Slightly reorder execmem_params initialization to support both 32 and 64
bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Alexandre Ghiti 
---
 arch/riscv/kernel/module.c | 28 +---
 arch/riscv/kernel/probes/kprobes.c | 10 --
 arch/riscv/net/bpf_jit_core.c  | 13 -
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..2ecbacbc9993 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,19 +906,41 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
 static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   unsigned long start, end;
+
+   if (IS_ENABLED(CONFIG_64BIT)) {
+   start = MODULES_VADDR;
+   end = MODULES_END;
+   } else {
+   start = VMALLOC_START;
+   end = VMALLOC_END;
+   }
+
execmem_info = (struct execmem_info){
.ranges = {
[EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
+   .start  = start,
+   .end= end,
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_READ_EXEC,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = BPF_JIT_REGION_START,
+   .end= BPF_JIT_REGION_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = PAGE_SIZE,
+   },
},
};
 
diff --git a/arch/riscv/kernel/probes/kprobes.c 
b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
-   return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
-VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-__builtin_return_address(0));
-}
-#endif
-
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
-   BPF_JIT_REGION_END, GFP_KERNEL,
-   PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 {
int ret;
-- 
2.43.0



[PATCH v5 08/15] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Extend execmem parameters to accommodate more complex overrides of
module_alloc() by architectures.

This includes specification of a fallback range required by arm, arm64
and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
allocation of KASAN shadow required by s390 and x86 and support for
early initialization of execmem required by x86.

The core implementation of execmem_alloc() takes care of suppressing
warnings when the initial allocation fails but there is a fallback range
defined.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Will Deacon 
---
 arch/Kconfig   |  6 +++
 arch/arm/kernel/module.c   | 41 ++---
 arch/arm64/kernel/module.c | 67 ++--
 arch/arm64/kernel/probes/kprobes.c |  7 ---
 arch/arm64/net/bpf_jit_comp.c  | 11 -
 arch/powerpc/kernel/module.c   | 60 -
 arch/s390/kernel/module.c  | 54 ++-
 arch/x86/Kconfig   |  1 +
 arch/x86/kernel/module.c   | 70 ++
 include/linux/execmem.h| 34 +++
 include/linux/moduleloader.h   | 12 -
 kernel/module/main.c   | 26 +++
 mm/execmem.c   | 70 +-
 mm/mm_init.c   |  2 +
 14 files changed, 259 insertions(+), 202 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 65afb1de48b3..7006f71f0110 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,12 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
  For architectures like powerpc/32 which have constraints on module
  allocation and need to allocate module data outside of module area.
 
+config ARCH_WANTS_EXECMEM_EARLY
+   bool
+   help
+ For architectures that might allocate executable memory early on
+ boot, for instance ftrace on x86.
+
 config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..a98fdf6ff26c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -34,23 +35,31 @@
 #endif
 
 #ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   gfp_t gfp_mask = GFP_KERNEL;
-   void *p;
-
-   /* Silence the initial allocation */
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
-   gfp_mask |= __GFP_NOWARN;
-
-   p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-   if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
-   return p;
-   return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   .fallback_end   = fallback_end,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e92da4da1b2a..a52240ea084b 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -108,41 +109,59 @@ static int __init module_init_limits(void)
 
return 0;
 }
-subsys_initcall(module_init_limits);
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   void *p = NULL;
+   unsigned long fallback_start = 0, fallback_end = 0;
+   unsigned long start = 0, end = 0;
+
+   module_init_limits();
 
/*
 * Where possible, prefer to allocate within direct branch range of the
 * kernel such that no PLTs are necessary.
 */
if (module_direct_base) {
-   p = __vmalloc_node_range(size, MODULE_ALIGN,
-module_d

[PATCH v5 07/15] mm/execmem, arch: convert simple overrides of module_alloc to execmem

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Several architectures override module_alloc() only to define address
range for code allocations different than VMALLOC address space.

Provide a generic implementation in execmem that uses the parameters for
address space ranges, required alignment and page protections provided
by architectures.

The architectures must fill execmem_info structure and implement
execmem_arch_setup() that returns a pointer to that structure. This way the
execmem initialization won't be called from every architecture, but rather
from a central place, namely a core_initcall() in execmem.

The execmem provides execmem_alloc() API that wraps __vmalloc_node_range()
with the parameters defined by the architectures.  If an architecture does
not implement execmem_arch_setup(), execmem_alloc() will fall back to
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/loongarch/kernel/module.c | 19 +++--
 arch/mips/kernel/module.c  | 20 --
 arch/nios2/kernel/module.c | 21 +++---
 arch/parisc/kernel/module.c| 24 +++
 arch/riscv/kernel/module.c | 24 +++
 arch/sparc/kernel/module.c | 20 --
 include/linux/execmem.h| 41 +++
 mm/execmem.c   | 73 --
 8 files changed, 208 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..ca6dd7ea1610 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -490,10 +491,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, 
__builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 9a6c96014904..59225a3cf918 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct mips_hi16 {
@@ -32,11 +33,22 @@ static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
 #ifdef MODULES_VADDR
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 #endif
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 9c97b7513853..0d1ee86631fc 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -18,15 +18,26 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
 {
-   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-   GFP_KERNEL, PAGE_KERNEL_EXEC,
-   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-   __builtin_return_address(0));
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   },
+   },
+   };
+
+   return _info;
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index d2

[PATCH v5 06/15] mm: introduce execmem_alloc() and execmem_free()

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) 
Acked-by: Masami Hiramatsu (Google) 
---
 arch/powerpc/kernel/kprobes.c|  6 ++--
 arch/s390/kernel/ftrace.c|  4 +--
 arch/s390/kernel/kprobes.c   |  4 +--
 arch/s390/kernel/module.c|  5 +--
 arch/sparc/net/bpf_jit_comp_32.c |  8 ++---
 arch/x86/kernel/ftrace.c |  6 ++--
 arch/x86/kernel/kprobes/core.c   |  4 +--
 include/linux/execmem.h  | 57 
 include/linux/moduleloader.h |  3 --
 kernel/bpf/core.c|  6 ++--
 kernel/kprobes.c |  8 ++---
 kernel/module/Kconfig|  1 +
 kernel/module/main.c | 25 +-
 mm/Kconfig   |  3 ++
 mm/Makefile  |  1 +
 mm/execmem.c | 32 ++
 16 files changed, 128 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/execmem.h
 create mode 100644 mm/execmem.c

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..9fcd01bb2ce6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -130,7 +130,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
 
@@ -142,7 +142,7 @@ void *alloc_insn_page(void)
}
return page;
 error:
-   module_memfree(page);
+   execmem_free(page);
return NULL;
 }
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..798249ef5646 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
  *   Author(s): Martin Schwidefsky 
  */
 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
 {
const char *start, *end;
 
-   ftrace_plt = module_alloc(PAGE_SIZE);
+   ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
 
 #define pr_fmt(fmt) "kprobes: " fmt
 
-#include 
 #include 
 #include 
 #include 
@@ -21,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
 {
void *page;
 
-   page = module_alloc(PAGE_SIZE);
+   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..ac97a905e8cd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
 #ifdef CONFIG_FUNCTION_TRACER
 void module_arch_cleanup(struct module *mod)
 {
-   module_memfree(mod->arch.trampolines_start);
+   execmem_free(mod->arch.trampolines_start);
 }
 #endif
 
@@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct 
module *me,
 
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
-   start = module_alloc(numpages * PAGE_SIZE);
+   start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if 

[PATCH v5 05/15] module: make module_memory_{alloc,free} more self-contained

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Move the logic related to the memory allocation and freeing into
module_memory_alloc() and module_memory_free().

Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/module/main.c | 64 +++-
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c1..5b82b069e0d3 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type)
mod_mem_type_is_core_data(type);
 }
 
-static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
+static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
 {
+   unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+   void *ptr;
+
+   mod->mem[type].size = size;
+
if (mod_mem_use_vmalloc(type))
-   return vzalloc(size);
-   return module_alloc(size);
+   ptr = vmalloc(size);
+   else
+   ptr = module_alloc(size);
+
+   if (!ptr)
+   return -ENOMEM;
+
+   /*
+* The pointer to these blocks of memory are stored on the module
+* structure and we keep that around so long as the module is
+* around. We only free that memory when we unload the module.
+* Just mark them as not being a leak then. The .init* ELF
+* sections *do* get freed after boot so we *could* treat them
+* slightly differently with kmemleak_ignore() and only grey
+* them out as they work as typical memory allocations which
+* *do* eventually get freed, but let's just keep things simple
+* and avoid *any* false positives.
+*/
+   kmemleak_not_leak(ptr);
+
+   memset(ptr, 0, size);
+   mod->mem[type].base = ptr;
+
+   return 0;
 }
 
-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(struct module *mod, enum mod_mem_type type)
 {
+   void *ptr = mod->mem[type].base;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
@@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
-   module_memory_free(mod_mem->base, type);
+   module_memory_free(mod, type);
}
 
/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, 
mod->mem[MOD_DATA].size);
-   module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+   module_memory_free(mod, MOD_DATA);
 }
 
 /* Free a module, remove from lists, etc. */
@@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 static int move_module(struct module *mod, struct load_info *info)
 {
int i;
-   void *ptr;
enum mod_mem_type t = 0;
int ret = -ENOMEM;
 
@@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct 
load_info *info)
mod->mem[type].base = NULL;
continue;
}
-   mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
-   ptr = module_memory_alloc(mod->mem[type].size, type);
-   /*
- * The pointer to these blocks of memory are stored on the 
module
- * structure and we keep that around so long as the module is
- * around. We only free that memory when we unload the module.
- * Just mark them as not being a leak then. The .init* ELF
- * sections *do* get freed after boot so we *could* treat them
- * slightly differently with kmemleak_ignore() and only grey
- * them out as they work as typical memory allocations which
- * *do* eventually get freed, but let's just keep things simple
- * and avoid *any* false positives.
-*/
-   kmemleak_not_leak(ptr);
-   if (!ptr) {
+
+   ret = module_memory_alloc(mod, type);
+   if (ret) {
t = type;
goto out_enomem;
}
-   memset(ptr, 0, mod->mem[type].size);
-   mod->mem[type].base = ptr;
}
 
/* Transfer each section which specifies SHF_ALLOC */
@@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
return 0;
 out_enomem:
for (t--; t >= 0; t--)
-   module_memory_free(mod->mem[t].base, t);
+   module_memory_free(mod, t);
return ret;
 }
 
-- 
2.43.0



[PATCH v5 04/15] sparc: simplify module_alloc()

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END
for 32-bit and reduce module_alloc() to

__vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, ...)

as with the new defines the allocations becames identical for both 32
and 64 bits.

While on it, drop unsed include of 

Suggested-by: Sam Ravnborg 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/sparc/include/asm/pgtable_32.h |  2 ++
 arch/sparc/kernel/module.c  | 25 +
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h 
b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct 
*vma,
 
 #define VMALLOC_START   _AC(0xfe60,UL)
 #define VMALLOC_END _AC(0xffc0,UL)
+#define MODULES_VADDR   VMALLOC_START
+#define MODULES_END VMALLOC_END
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..d37adb2a0b54 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,35 +21,12 @@
 
 #include "entry.h"
 
-#ifdef CONFIG_SPARC64
-
-#include 
-
-static void *module_map(unsigned long size)
+void *module_alloc(unsigned long size)
 {
-   if (PAGE_ALIGN(size) > MODULES_LEN)
-   return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
-#else
-static void *module_map(unsigned long size)
-{
-   return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
-   void *ret;
-
-   ret = module_map(size);
-   if (ret)
-   memset(ret, 0, size);
-
-   return ret;
-}
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
-- 
2.43.0



[PATCH v5 03/15] nios2: define virtual address space for modules

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26
cannot reach all of vmalloc address space.

Define module space as 32MiB below the kernel base and switch nios2 to
use vmalloc for module allocations.

Suggested-by: Thomas Gleixner 
Acked-by: Dinh Nguyen 
Acked-by: Song Liu 
Signed-off-by: Mike Rapoport (IBM) 
---
 arch/nios2/include/asm/pgtable.h |  5 -
 arch/nios2/kernel/module.c   | 19 ---
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
 #include 
 
 #define VMALLOC_START  CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR  (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
 
 struct mm_struct;
 
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..9c97b7513853 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -21,23 +21,12 @@
 
 #include 
 
-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x8000 (vmalloc area) to 0xc (kernel) (kmalloc returns
- * addresses in 0xc000)
- */
 void *module_alloc(unsigned long size)
 {
-   if (size == 0)
-   return NULL;
-   return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
-   kfree(module_region);
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+   GFP_KERNEL, PAGE_KERNEL_EXEC,
+   VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+   __builtin_return_address(0));
 }
 
 int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
-- 
2.43.0



[PATCH v5 02/15] mips: module: rename MODULE_START to MODULES_VADDR

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

and MODULE_END to MODULES_END to match other architectures that define
custom address space for modules.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/mips/include/asm/pgtable-64.h | 4 ++--
 arch/mips/kernel/module.c  | 4 ++--
 arch/mips/mm/fault.c   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
 /* Load modules into 32bit-compatible segment. */
-#define MODULE_START   CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR  CKSSEG
+#define MODULES_END(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
 #define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..9a6c96014904 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -31,10 +31,10 @@ struct mips_hi16 {
 static LIST_HEAD(dbe_list);
 static DEFINE_SPINLOCK(dbe_lock);
 
-#ifdef MODULE_START
+#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
-   return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
 }
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned 
long write,
 
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
-   if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+   if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
 #endif
 
-- 
2.43.0



[PATCH v5 01/15] arm64: module: remove unneeded call to kasan_alloc_module_shadow()

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Since commit f6f37d9320a1 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS
modes") KASAN_VMALLOC is always enabled when KASAN is on. This means
that allocations in module_alloc() will be tracked by KASAN protection
for vmalloc() and that kasan_alloc_module_shadow() will be always an
empty inline and there is no point in calling it.

Drop meaningless call to kasan_alloc_module_shadow() from
module_alloc().

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm64/kernel/module.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..e92da4da1b2a 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -141,11 +141,6 @@ void *module_alloc(unsigned long size)
__func__);
}
 
-   if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
-   vfree(p);
-   return NULL;
-   }
-
/* Memory is intended to be executable, reset the pointer tag. */
return kasan_reset_tag(p);
 }
-- 
2.43.0



[PATCH v5 00/15] mm: jit/text allocator

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

(something went wrong with the prevois posting, sorry for the noise)

Hi,

Since v3 I looked into making execmem more of an utility toolbox, as we
discussed at LPC with Mark Rutland, but it was getting more hairier than
having a struct describing architecture constraints and a type identifying
the consumer of execmem.

And I do think that having the description of architecture constraints for
allocations of executable memory in a single place is better than having it
spread all over the place.

The patches available via git:
https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/log/?h=execmem/v5

v5 changes:
* rebase on v6.9-rc4 to avoid a conflict in kprobes
* add copyrights to mm/execmem.c (Luis)
* fix spelling (Ingo)
* define MODULES_VADDDR for sparc (Sam)
* consistently initialize struct execmem_info (Peter)
* reduce #ifdefs in function bodies in kprobes (Masami) 

v4: https://lore.kernel.org/all/20240411160051.2093261-1-r...@kernel.org
* rebase on v6.9-rc2
* rename execmem_params to execmem_info and execmem_arch_params() to
  execmem_arch_setup()
* use single execmem_alloc() API instead of execmem_{text,data}_alloc() (Song)
* avoid extra copy of execmem parameters (Rick)
* run execmem_init() as core_initcall() except for the architectures that
  may allocated text really early (currently only x86) (Will)
* add acks for some of arm64 and riscv changes, thanks Will and Alexandre
* new commits:
  - drop call to kasan_alloc_module_shadow() on arm64 because it's not
needed anymore
  - rename MODULE_START to MODULES_VADDR on MIPS
  - use CONFIG_EXECMEM instead of CONFIG_MODULES on powerpc as per Christophe:
https://lore.kernel.org/all/79062fa3-3402-47b3-8920-9231ad05e...@csgroup.eu/

v3: https://lore.kernel.org/all/20230918072955.2507221-1-r...@kernel.org
* add type parameter to execmem allocation APIs
* remove BPF dependency on modules

v2: https://lore.kernel.org/all/20230616085038.4121892-1-r...@kernel.org
* Separate "module" and "others" allocations with execmem_text_alloc()
and jit_text_alloc()
* Drop ROX entailment on x86
* Add ack for nios2 changes, thanks Dinh Nguyen

v1: https://lore.kernel.org/all/20230601101257.530867-1-r...@kernel.org

= Cover letter from v1 (sligtly updated) =

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystmes
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

A centralized infrastructure for code allocation allows allocations of
executable memory as ROX, and future optimizations such as caching large
pages for better iTLB performance and providing sub-page allocations for
users that only need small jit code snippets.

Rick Edgecombe proposed perm_alloc extension to vmalloc [1] and Song Liu
proposed execmem_alloc [2], but both these approaches were targeting BPF
allocations and lacked the ground work to abstract executable allocations
and split them from the modules core.

Thomas Gleixner suggested to express module allocation restrictions and
requirements as struct mod_alloc_type_params [3] that would define ranges,
protections and other parameters for different types of allocations used by
modules and following that suggestion Song separated allocations of
different types in modules (commit ac3b43283923 ("module: replace
module_layout with module_memory")) and posted "Type aware module
allocator" set [4].

I liked the idea of parametrising code allocation requirements as a
structure, but I believe the original proposal and Song's module allocator
was too module centric, so I came up with these patches.

This set splits code allocation from modules by introducing execmem_alloc()
and and execmem_free(), APIs, replaces call sites of module_alloc() and
module_memfree() with the new APIs and implements core text and related
allocations in a central place.

Instead of architecture specific overrides for module_alloc(), the
architectures that require non-default behaviour for text allocation must
fill execmem_info structure and implement execmem_arch_setup() that returns
a pointer to that structure. If an architecture does not implement
execmem_arch_setup(), the defaults compatible with the current
modules::module_alloc() are used.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem APIs
take a type argument, that will be used to identify the calling subsystem
and to allow architectures to define parameters for ranges suitable for that
subsystem.

The new infrastructure

[PATCH v5 15/15] bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

BPF just-in-time compiler depended on CONFIG_MODULES because it used
module_alloc() to allocate memory for the generated code.

Since code allocations are now implemented with execmem, drop dependency of
CONFIG_BPF_JIT on CONFIG_MODULES and make it select CONFIG_EXECMEM.

Suggested-by: Björn Töpel 
Signed-off-by: Mike Rapoport (IBM) 
---
 kernel/bpf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bc25f5098a25..f999e4e0b344 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -43,7 +43,7 @@ config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-   depends on MODULES
+   select EXECMEM
help
  BPF programs are normally handled by a BPF interpreter. This option
  allows the kernel to generate native code when a program is loaded
-- 
2.43.0



[PATCH v5 14/15] kprobes: remove dependency on CONFIG_MODULES

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

kprobes depended on CONFIG_MODULES because it has to allocate memory for
code.

Since code allocations are now implemented with execmem, kprobes can be
enabled in non-modular kernels.

Add #ifdef CONFIG_MODULE guards for the code dealing with kprobes inside
modules, make CONFIG_KPROBES select CONFIG_EXECMEM and drop the
dependency of CONFIG_KPROBES on CONFIG_MODULES.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/Kconfig|  2 +-
 include/linux/module.h  |  9 ++
 kernel/kprobes.c| 55 +++--
 kernel/trace/trace_kprobe.c | 20 +-
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 7006f71f0110..a48ce6a488b3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -52,9 +52,9 @@ config GENERIC_ENTRY
 
 config KPROBES
bool "Kprobes"
-   depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+   select EXECMEM
select TASKS_RCU if PREEMPTION
help
  Kprobes allows you to trap at almost any kernel address and
diff --git a/include/linux/module.h b/include/linux/module.h
index 1153b0d99a80..ffa1c603163c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod)
return mod->state != MODULE_STATE_GOING;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+return mod->state == MODULE_STATE_COMING;
+}
+
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
@@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module 
*mod, void *ptr)
return ptr;
 }
 
+static inline bool module_is_coming(struct module *mod)
+{
+   return false;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ddd7cdc16edf..ca2c6cbd42d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,7 +1588,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
}
 
/* Get module refcount and reject __init functions for loaded modules. 
*/
-   if (*probed_mod) {
+   if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
/*
 * We must hold a refcount of the probed module while updating
 * its code to prohibit unexpected unloading.
@@ -1603,12 +1603,13 @@ static int check_kprobe_address_safe(struct kprobe *p,
 * kprobes in there.
 */
if (within_module_init((unsigned long)p->addr, *probed_mod) &&
-   (*probed_mod)->state != MODULE_STATE_COMING) {
+   !module_is_coming(*probed_mod)) {
module_put(*probed_mod);
*probed_mod = NULL;
ret = -ENOENT;
}
}
+
 out:
preempt_enable();
jump_label_unlock();
@@ -2488,24 +2489,6 @@ int kprobe_add_area_blacklist(unsigned long start, 
unsigned long end)
return 0;
 }
 
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
-{
-   struct kprobe_blacklist_entry *ent, *n;
-
-   list_for_each_entry_safe(ent, n, _blacklist, list) {
-   if (ent->start_addr < start || ent->start_addr >= end)
-   continue;
-   list_del(>list);
-   kfree(ent);
-   }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
-   kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   char *type, char *sym)
 {
@@ -2570,6 +2553,25 @@ static int __init populate_kprobe_blacklist(unsigned 
long *start,
return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long 
end)
+{
+   struct kprobe_blacklist_entry *ent, *n;
+
+   list_for_each_entry_safe(ent, n, _blacklist, list) {
+   if (ent->start_addr < start || ent->start_addr >= end)
+   continue;
+   list_del(>list);
+   kfree(ent);
+   }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+   kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 static void add_module_kprobe_blacklist(struct module *mod)
 {
unsigned long start, end;
@@ -2672,6 +2674,17 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static int kprobe_register_module_notifier(void)
+{
+   return register_module_no

[PATCH v5 13/15] powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

There are places where CONFIG_MODULES guards the code that depends on
memory allocation being done with module_alloc().

Replace CONFIG_MODULES with CONFIG_EXECMEM in such places.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/Kconfig | 2 +-
 arch/powerpc/include/asm/kasan.h | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 4 ++--
 arch/powerpc/kernel/head_book3s_32.S | 6 +++---
 arch/powerpc/lib/code-patching.c | 2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..2e586733a464 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,7 +285,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
-   select KASAN_VMALLOCif KASAN && MODULES
+   select KASAN_VMALLOCif KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT   3
 
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
 #define KASAN_KERN_START   ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START   PAGE_OFFSET
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr   SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcrr11
compare_to_kernel_boundary r10, r10
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+3f
rlwinm  r10, r10, 0, 20, 31
orisr10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SDR1
li  r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm  r2, r2, 28, 0xf000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li  r0, 3
bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc.   r1,r1,r2/* check access & ~permission */
bne-InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi  r2, r0, 0, 31, 31   /* userspace ? -> PP lsb */
 #endif
ori r1, r1, 0xe06   /* clear out reserved bits */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..7af791446ddf 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
 
 static unsigned long get_patch_pfn(void *addr)
 {
-   if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+   if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
 
 static bool is_module_segment(unsigned long addr)
 {
-   if (!IS_ENABLED(CONFIG_MODULES))
+   if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
-- 
2.43.0



[PATCH v5 12/15] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

Dynamic ftrace must allocate memory for code and this was impossible
without CONFIG_MODULES.

With execmem separated from the modules code, execmem_text_alloc() is
available regardless of CONFIG_MODULES.

Remove dependency of dynamic ftrace on CONFIG_MODULES and make
CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/x86/Kconfig |  1 +
 arch/x86/kernel/ftrace.c | 10 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3f5ba72c9480..cd8addb96a0b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+   select EXECMEM if DYNAMIC_FTRACE
 
 config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c8ddb7abda7c..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -261,8 +261,6 @@ void arch_ftrace_update_code(int command)
 /* Currently only x86_64 supports dynamic trampolines */
 #ifdef CONFIG_X86_64
 
-#ifdef CONFIG_MODULES
-/* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
return execmem_alloc(EXECMEM_FTRACE, size);
@@ -271,14 +269,6 @@ static inline void tramp_free(void *tramp)
 {
execmem_free(tramp);
 }
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
-   return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-- 
2.43.0



[PATCH v5 11/15] arch: make execmem setup available regardless of CONFIG_MODULES

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

execmem does not depend on modules, on the contrary modules use
execmem.

To make execmem available when CONFIG_MODULES=n, for instance for
kprobes, split execmem_params initialization out from
arch/*/kernel/module.c and compile it when CONFIG_EXECMEM=y

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/arm/kernel/module.c   |  43 --
 arch/arm/mm/init.c |  45 +++
 arch/arm64/kernel/module.c | 140 -
 arch/arm64/mm/init.c   | 140 +
 arch/loongarch/kernel/module.c |  19 -
 arch/loongarch/mm/init.c   |  21 +
 arch/mips/kernel/module.c  |  22 --
 arch/mips/mm/init.c|  23 ++
 arch/nios2/kernel/module.c |  20 -
 arch/nios2/mm/init.c   |  21 +
 arch/parisc/kernel/module.c|  20 -
 arch/parisc/mm/init.c  |  23 +-
 arch/powerpc/kernel/module.c   |  63 ---
 arch/powerpc/mm/mem.c  |  64 +++
 arch/riscv/kernel/module.c |  44 ---
 arch/riscv/mm/init.c   |  45 +++
 arch/s390/kernel/module.c  |  27 ---
 arch/s390/mm/init.c|  30 +++
 arch/sparc/kernel/module.c |  19 -
 arch/sparc/mm/Makefile |   2 +
 arch/sparc/mm/execmem.c|  21 +
 arch/x86/kernel/module.c   |  27 ---
 arch/x86/mm/init.c |  29 +++
 23 files changed, 463 insertions(+), 445 deletions(-)
 create mode 100644 arch/sparc/mm/execmem.c

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index a98fdf6ff26c..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,57 +12,14 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
-#include 
 
 #include 
 #include 
 #include 
 #include 
 
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-static struct execmem_info execmem_info __ro_after_init;
-
-struct execmem_info __init *execmem_arch_setup(void)
-{
-   unsigned long fallback_start = 0, fallback_end = 0;
-
-   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
-   fallback_start = VMALLOC_START;
-   fallback_end = VMALLOC_END;
-   }
-
-   execmem_info = (struct execmem_info){
-   .ranges = {
-   [EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
-   .pgprot = PAGE_KERNEL_EXEC,
-   .alignment = 1,
-   .fallback_start = fallback_start,
-   .fallback_end   = fallback_end,
-   },
-   },
-   };
-
-   return _info;
-}
-#endif
-
 bool module_init_section(const char *name)
 {
return strstarts(name, ".init") ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long 
end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR  (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+   unsigned long fallback_start = 0, fallback_end = 0;
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+   fallback_start = VMALLOC_START;
+   fallback_end = VMALLOC_END;
+   }
+
+   execmem_info = (struct execmem_info){
+   .ranges = {
+   [EXECMEM_DEFAULT] = {
+   .start  = MODULES_VADDR,
+   .end= MODULES_END,
+   .pgprot = PAGE_KERNEL_EXEC,
+   .alignment = 1,
+   .fallback_start = fallback_start,
+   

[PATCH v5 10/15] powerpc: extend execmem_params for kprobes allocations

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

powerpc overrides kprobes::alloc_insn_page() to remove writable
permissions when STRICT_MODULE_RWX is on.

Add definition of EXECMEM_KRPOBES to execmem_params to allow using the
generic kprobes::alloc_insn_page() with the desired permissions.

As powerpc uses breakpoint instructions to inject kprobes, it does not
need to constrain kprobe allocations to the modules area and can use the
entire vmalloc address space.

Signed-off-by: Mike Rapoport (IBM) 
---
 arch/powerpc/kernel/kprobes.c | 20 
 arch/powerpc/kernel/module.c  |  7 +++
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9fcd01bb2ce6..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long 
addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
 }
 
-void *alloc_insn_page(void)
-{
-   void *page;
-
-   page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
-   if (!page)
-   return NULL;
-
-   if (strict_module_rwx_enabled()) {
-   int err = set_memory_rox((unsigned long)page, 1);
-
-   if (err)
-   goto error;
-   }
-   return page;
-error:
-   execmem_free(page);
-   return NULL;
-}
-
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ac80559015a3..2a23cf7e141b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -94,6 +94,7 @@ static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : 
PAGE_KERNEL_EXEC;
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : 
PAGE_KERNEL_EXEC;
unsigned long fallback_start = 0, fallback_end = 0;
unsigned long start, end;
@@ -132,6 +133,12 @@ struct execmem_info __init *execmem_arch_setup(void)
.fallback_start = fallback_start,
.fallback_end   = fallback_end,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = kprobes_prot,
+   .alignment = 1,
+   },
[EXECMEM_MODULE_DATA] = {
.start  = VMALLOC_START,
.end= VMALLOC_END,
-- 
2.43.0



[PATCH v5 09/15] riscv: extend execmem_params for generated code allocations

2024-04-22 Thread Mike Rapoport
From: "Mike Rapoport (IBM)" 

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and  bpf_jit_alloc_exec().

Slightly reorder execmem_params initialization to support both 32 and 64
bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) 
Reviewed-by: Alexandre Ghiti 
---
 arch/riscv/kernel/module.c | 28 +---
 arch/riscv/kernel/probes/kprobes.c | 10 --
 arch/riscv/net/bpf_jit_core.c  | 13 -
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..2ecbacbc9993 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,19 +906,41 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char 
*strtab,
return 0;
 }
 
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
 static struct execmem_info execmem_info __ro_after_init;
 
 struct execmem_info __init *execmem_arch_setup(void)
 {
+   unsigned long start, end;
+
+   if (IS_ENABLED(CONFIG_64BIT)) {
+   start = MODULES_VADDR;
+   end = MODULES_END;
+   } else {
+   start = VMALLOC_START;
+   end = VMALLOC_END;
+   }
+
execmem_info = (struct execmem_info){
.ranges = {
[EXECMEM_DEFAULT] = {
-   .start  = MODULES_VADDR,
-   .end= MODULES_END,
+   .start  = start,
+   .end= end,
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+   [EXECMEM_KPROBES] = {
+   .start  = VMALLOC_START,
+   .end= VMALLOC_END,
+   .pgprot = PAGE_KERNEL_READ_EXEC,
+   .alignment = 1,
+   },
+   [EXECMEM_BPF] = {
+   .start  = BPF_JIT_REGION_START,
+   .end= BPF_JIT_REGION_END,
+   .pgprot = PAGE_KERNEL,
+   .alignment = PAGE_SIZE,
+   },
},
};
 
diff --git a/arch/riscv/kernel/probes/kprobes.c 
b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
 }
 
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
-   return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
-VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-__builtin_return_address(0));
-}
-#endif
-
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
 }
 
-void *bpf_jit_alloc_exec(unsigned long size)
-{
-   return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
-   BPF_JIT_REGION_END, GFP_KERNEL,
-   PAGE_KERNEL, 0, NUMA_NO_NODE,
-   __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
-   return vfree(addr);
-}
-
 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 {
int ret;
-- 
2.43.0



  1   2   3   4   5   6   7   8   9   10   >