[PATCH v14 09/11] x86, arm64: Add ARCH_WANT_RESERVE_CRASH_KERNEL config
We make the functions reserve_crashkernel[_low]() as generic for x86 and arm64. Since reserve_crashkernel[_low]() implementations are quite similar on other architectures as well, we can have more users of this later. So have CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL in arch/Kconfig and select this by X86 and ARM64. Suggested-by: Mike Rapoport Suggested-by: Baoquan He Signed-off-by: Chen Zhou --- arch/Kconfig| 3 +++ arch/arm64/Kconfig | 1 + arch/x86/Kconfig| 2 ++ kernel/crash_core.c | 7 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 24862d15f3a3..0ca1ff5bb157 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -24,6 +24,9 @@ config KEXEC_ELF config HAVE_IMA_KEXEC bool +config ARCH_WANT_RESERVE_CRASH_KERNEL + bool + config SET_FS bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f39568b28ec1..09365c7ff469 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -82,6 +82,7 @@ config ARM64 select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANT_RESERVE_CRASH_KERNEL if KEXEC_CORE select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 21f851179ff0..e6926fcb4a40 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -12,6 +12,7 @@ config X86_32 depends on !64BIT # Options that are inherently 32-bit kernel only: select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_RESERVE_CRASH_KERNEL if KEXEC_CORE select CLKSRC_I8253 select CLONE_BACKWARDS select GENERIC_VDSO_32 @@ -28,6 +29,7 @@ config X86_64 select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_WANT_RESERVE_CRASH_KERNEL if KEXEC_CORE select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 8479be270c0b..2c5783985db5 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -320,9 +320,7 @@ int __init parse_crashkernel_low(char *cmdline, * - Crashkernel reservation -- */ -#ifdef CONFIG_KEXEC_CORE - -#if defined(CONFIG_X86) || defined(CONFIG_ARM64) +#ifdef CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL static int __init reserve_crashkernel_low(void) { #ifdef CONFIG_64BIT @@ -450,8 +448,7 @@ void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#endif -#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL */ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) -- 2.20.1
[PATCH v14 06/11] x86/elf: Move vmcore_elf_check_arch_cross to arch/x86/include/asm/elf.h
Move macro vmcore_elf_check_arch_cross from arch/x86/include/asm/kexec.h to arch/x86/include/asm/elf.h to fix the following compiling warning: make ARCH=i386 In file included from arch/x86/kernel/setup.c:39:0: ./arch/x86/include/asm/kexec.h:77:0: warning: "vmcore_elf_check_arch_cross" redefined # define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) In file included from arch/x86/kernel/setup.c:9:0: ./include/linux/crash_dump.h:39:0: note: this is the location of the previous definition #define vmcore_elf_check_arch_cross(x) 0 The root cause is that vmcore_elf_check_arch_cross under CONFIG_CRASH_CORE depend on CONFIG_KEXEC_CORE. Commit 2db65f1db17d ("x86: kdump: move reserve_crashkernel[_low]() into crash_core.c") triggered the issue. Suggested by Mike, simply move vmcore_elf_check_arch_cross from arch/x86/include/asm/kexec.h to arch/x86/include/asm/elf.h to fix the warning. Fixes: 2db65f1db17d ("x86: kdump: move reserve_crashkernel[_low]() into crash_core.c") Reported-by: kernel test robot Suggested-by: Mike Rapoport Signed-off-by: Chen Zhou --- arch/x86/include/asm/elf.h | 3 +++ arch/x86/include/asm/kexec.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 66bdfe838d61..5333777cc758 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -94,6 +94,9 @@ extern unsigned int vdso32_enabled; #define elf_check_arch(x) elf_check_arch_ia32(x) +/* We can also handle crash dumps from 64 bit kernel. */ +# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) + /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx contains a pointer to a function which might be registered using `atexit'. This provides a mean for the dynamic linker to call DT_FINI functions for diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 2b18f918203e..6fcae01a9cca 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -72,9 +72,6 @@ struct kimage; /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_386 - -/* We can also handle crash dumps from 64 bit kernel. */ -# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) #else /* Maximum physical address we can use pages from */ # define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1) -- 2.20.1
[PATCH v14 02/11] x86: kdump: make the lower bound of crash kernel reservation consistent
The lower bounds of crash kernel reservation and crash kernel low reservation are different, use the consistent value CRASH_ALIGN. Suggested-by: Dave Young Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index da769845597d..27470479e4a3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -439,7 +439,8 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, CRASH_ALIGN, + CRASH_ADDR_LOW_MAX); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); -- 2.20.1
[PATCH v14 00/11] support reserving crashkernel above 4G on arm64 kdump
Put "move reserve_crashkernel_low() into kexec_core.c" in a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-June/020737.html [2]: https://github.com/robherring/dt-schema/pull/19 [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 [v8]: https://lkml.org/lkml/2020/5/21/213 [v9]: https://lkml.org/lkml/2020/6/28/73 [v10]: https://lkml.org/lkml/2020/7/2/1443 [v11]: https://lkml.org/lkml/2020/8/1/150 [v12]: https://lkml.org/lkml/2020/9/7/1037 [v13]: https://lkml.org/lkml/2020/10/31/34 Chen Zhou (11): x86: kdump: replace the hard-coded alignment with macro CRASH_ALIGN x86: kdump: make the lower bound of crash kernel reservation consistent x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel() x86: kdump: move xen_pv_domain() check and insert_resource() to setup_arch() x86: kdump: move reserve_crashkernel[_low]() into crash_core.c x86/elf: Move vmcore_elf_check_arch_cross to arch/x86/include/asm/elf.h arm64: kdump: introduce some macroes for crash kernel reservation arm64: kdump: reimplement crashkernel=X x86, arm64: Add ARCH_WANT_RESERVE_CRASH_KERNEL config arm64: kdump: add memory for devices by DT property linux,usable-memory-range kdump: update Documentation about crashkernel Documentation/admin-guide/kdump/kdump.rst | 22 ++- .../admin-guide/kernel-parameters.txt | 11 +- arch/Kconfig | 3 + arch/arm64/Kconfig| 1 + arch/arm64/include/asm/kexec.h| 10 ++ arch/arm64/kernel/setup.c | 13 +- arch/arm64/mm/init.c | 111 +--- arch/x86/Kconfig | 2 + arch/x86/include/asm/elf.h| 3 + arch/x86/include/asm/kexec.h | 31 +++- arch/x86/kernel/setup.c | 163 ++ include/linux/crash_core.h| 3 + include/linux/kexec.h | 2 - kernel/crash_core.c | 156 + kernel/kexec_core.c | 17 -- 15 files changed, 303 insertions(+), 245 deletions(-) -- 2.20.1
[PATCH v14 11/11] kdump: update Documentation about crashkernel
For arm64, the behavior of crashkernel=X has been changed, which tries low allocation in DMA zone and fall back to high allocation if it fails. We can also use "crashkernel=X,high" to select a high region above DMA zone, which also tries to allocate at least 256M low memory in DMA zone automatically and "crashkernel=Y,low" can be used to allocate specified size low memory. So update the Documentation. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- Documentation/admin-guide/kdump/kdump.rst | 22 --- .../admin-guide/kernel-parameters.txt | 11 -- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 75a9dd98e76e..0877c76f8015 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,16 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=X" to select a region under 4G first, and + fall back to reserve region above 4G. And go for high allocation + directly if the required size is too large. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if you really have to reserve memory from specified + start address X. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +325,15 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of - the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + On arm64, use "crashkernel=X" to try low allocation in DMA zone and + fall back to high allocation if it fails. + We can also use "crashkernel=X,high" to select a high region above + DMA zone, which also tries to allocate at least 256M low memory in + DMA zone automatically. + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if you really have to reserve memory from + specified start address X. Note that the start address of the kernel, + X if explicitly specified, must be aligned to 2MiB (0x20). Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a10b545c2070..908e5c8b61ba 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -738,6 +738,9 @@ [KNL, X86-64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] Try low allocation in DMA zone and fall back + to high allocation if it fails when '@offset' hasn't been + specified. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -754,6 +757,8 @@ Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. + [KNL, arm64] range in high memory. + Allow kernel to allocate physical memory region from top. crashkernel=size[KMG],low [KNL, X86-64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region @@ -762,13 +767,15 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at - at least 256M below 4G automatically. + least 256M below 4G automatically. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - + [KNL, arm64] range in low memory. + This
[PATCH v14 05/11] x86: kdump: move reserve_crashkernel[_low]() into crash_core.c
Make the functions reserve_crashkernel[_low]() as generic. Arm64 will use these to reimplement crashkernel=X. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/include/asm/kexec.h | 25 ++ arch/x86/kernel/setup.c | 143 +-- include/linux/crash_core.h | 3 + include/linux/kexec.h| 2 - kernel/crash_core.c | 159 +++ kernel/kexec_core.c | 17 6 files changed, 189 insertions(+), 160 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index be18dc7ae51f..2b18f918203e 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -21,6 +21,27 @@ /* 16M alignment for crash kernel regions */ #define CRASH_ALIGNSZ_16M +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAXSZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAXSZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + #ifndef __ASSEMBLY__ #include @@ -200,6 +221,10 @@ typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); +#ifdef CONFIG_KEXEC_CORE +extern void __init reserve_crashkernel(void); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d676efc32f6..d136d6ad3fa8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -384,147 +385,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) } } -/* - * - Crashkernel reservation -- - */ - -#ifdef CONFIG_KEXEC_CORE - -/* - * Keep the crash kernel below this limit. - * - * Earlier 32-bits kernels would limit the kernel to the low 512 MB range - * due to mapping restrictions. - * - * 64-bit kdump kernels need to be restricted to be under 64 TB, which is - * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jump could be from 5-level paging to 4-level paging, the jump will fail if - * the kernel is put above 64 TB, and during the 1st kernel bootup there's - * no good way to detect the paging mode of the target kernel which will be - * loaded for dumping. - */ -#ifdef CONFIG_X86_32 -# define CRASH_ADDR_LOW_MAXSZ_512M -# define CRASH_ADDR_HIGH_MAX SZ_512M -#else -# define CRASH_ADDR_LOW_MAXSZ_4G -# define CRASH_ADDR_HIGH_MAX SZ_64T -#endif - -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long low_mem_limit; - int ret; - - low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, low_mem_limit, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, CRASH_ALIGN, - CRASH_ADDR_LOW_MAX); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(low_mem_limit >> 20)); - - crashk_low_res.start = low_base; -
[PATCH v14 03/11] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()
To make the functions reserve_crashkernel() as generic, replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/kernel/setup.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 27470479e4a3..086a04235be4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -487,8 +487,9 @@ static void __init reserve_crashkernel(void) if (!crash_base) { /* * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, -* crashkernel=x,high reserves memory over 4G, also allocates -* 256M extra low memory for DMA buffers and swiotlb. +* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX, +* also allocates 256M extra low memory for DMA buffers +* and swiotlb. * But the extra memory is not required for all machines. * So try low memory first and fall back to high memory * unless "crashkernel=size[KMG],high" is specified. @@ -516,7 +517,7 @@ static void __init reserve_crashkernel(void) } } - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { + if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { memblock_free(crash_base, crash_size); return; } -- 2.20.1
[PATCH v14 08/11] arm64: kdump: reimplement crashkernel=X
There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. If reserving crashkernel above 4G, in this case, crash dump kernel will boot failure because there is no low memory available for allocation. To solve these issues, change the behavior of crashkernel=X and introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation in DMA zone, and fall back to high allocation if it fails. We can also use "crashkernel=X,high" to select a region above DMA zone, which also tries to allocate at least 256M in DMA zone automatically. "crashkernel=Y,low" can be used to allocate specified size low memory. Another minor change, there may be two regions reserved for crash dump kernel, in order to distinct from the high region and make no effect to the use of existing kexec-tools, rename the low region as "Crash kernel (low)". Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/arm64/include/asm/kexec.h | 4 ++ arch/arm64/kernel/setup.c | 13 ++- arch/arm64/mm/init.c | 68 ++ kernel/crash_core.c| 6 +-- 4 files changed, 30 insertions(+), 61 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 3f6ecae0bc68..f0caed0cb5e1 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -96,6 +96,10 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#ifdef CONFIG_KEXEC_CORE +extern void __init reserve_crashkernel(void); +#endif + #ifdef CONFIG_KEXEC_FILE #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c18aacde8bb0..69c592c546de 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,18 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" or "Crash kernel (low)" +* region in /proc/iomem. +* In order to distinct from the high region and make no effect +* to the use of existing kexec-tools, rename the low region as +* "Crash kernel (low)". +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) { + crashk_low_res.name = "Crash kernel (low)"; + request_resource(res, _low_res); + } if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 912f64f505f7..d20f5c444ebf 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -61,66 +62,11 @@ EXPORT_SYMBOL(memstart_addr); */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -#ifdef CONFIG_KEXEC_CORE -/* - * reserve_crashkernel() - reserves memory for crash kernel - * - * This function reserves memory area given in "crashkernel=" kernel command - * line parameter. The memory reserved is used by dump capture kernel when - * primary kernel is crashing. - */ +#ifndef CONFIG_KEXEC_CORE static void __init reserve_crashkernel(void) { - unsigned long long crash_base, crash_size; - int ret; - - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - _size, _base); - /* no crashkernel= or invalid value specified */ - if (ret || !crash_size) - return; - - crash_size = PAGE_ALIGN(crash_size); - - if (crash_base == 0) { - /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, - crash_size, CRASH_ALIGN); - if (crash_base == 0) { - pr_warn("cannot allocate crashkernel (size:0x%llx)\n", - crash_size); - return; - } - } else { - /* User specifies base address explicitly. */ - if (!memblock_is_region_memory(crash_base, crash_size)) { - pr_warn("cannot reserve crashkernel: region is not memory\n"); - return; - }
[PATCH v14 10/11] arm64: kdump: add memory for devices by DT property linux,usable-memory-range
When reserving crashkernel in high memory, some low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,usable-memory-range = We reused the DT property linux,usable-memory-range and made the low memory region as the second range "BASE2 SIZE2", which keeps compatibility with existing user-space and older kdump kernels. Crash dump kernel reads this property at boot time and call memblock_add() to add the low memory region after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/arm64/mm/init.c | 43 +-- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d20f5c444ebf..180a25b67f55 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -68,6 +68,15 @@ static void __init reserve_crashkernel(void) } #endif +/* + * The main usage of linux,usable-memory-range is for crash dump kernel. + * Originally, the number of usable-memory regions is one. Now there may + * be two regions, low region and high region. + * To make compatibility with existing user-space and older kdump, the low + * region is always the last range of linux,usable-memory-range if exist. + */ +#define MAX_USABLE_RANGES 2 + #ifdef CONFIG_CRASH_DUMP static int __init early_init_dt_scan_elfcorehdr(unsigned long node, const char *uname, int depth, void *data) @@ -201,9 +210,9 @@ early_param("mem", early_mem); static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { - struct memblock_region *usablemem = data; - const __be32 *reg; - int len; + struct memblock_region *usable_rgns = data; + const __be32 *reg, *endp; + int len, nr = 0; if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; @@ -212,22 +221,36 @@ static int __init early_init_dt_scan_usablemem(unsigned long node, if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) return 1; - usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); - usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); + endp = reg + (len / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + usable_rgns[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); + usable_rgns[nr].size = dt_mem_next_cell(dt_root_size_cells, ); + + if (++nr >= MAX_USABLE_RANGES) + break; + } return 1; } static void __init fdt_enforce_memory_region(void) { - struct memblock_region reg = { - .size = 0, + struct memblock_region usable_rgns[MAX_USABLE_RANGES] = { + { .size = 0 }, + { .size = 0 } }; - of_scan_flat_dt(early_init_dt_scan_usablemem, ); + of_scan_flat_dt(early_init_dt_scan_usablemem, _rgns); - if (reg.size) - memblock_cap_memory_range(reg.base, reg.size); + /* +* The first range of usable-memory regions is for crash dump +* kernel with only one region or for high region with two regions, +* the second range is dedicated for low region if exist. +*/ + if (usable_rgns[0].size) + memblock_cap_memory_range(usable_rgns[0].base, usable_rgns[0].size); + if (usable_rgns[1].size) + memblock_add(usable_rgns[1].base, usable_rgns[1].size); } void __init arm64_memblock_init(void) -- 2.20.1
[PATCH v14 07/11] arm64: kdump: introduce some macroes for crash kernel reservation
Introduce macro CRASH_ALIGN for alignment, macro CRASH_ADDR_LOW_MAX for upper bound of low crash memory, macro CRASH_ADDR_HIGH_MAX for upper bound of high crash memory, use macroes instead. Besides, keep consistent with x86, use CRASH_ALIGN as the lower bound of crash kernel reservation. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/arm64/include/asm/kexec.h | 6 ++ arch/arm64/mm/init.c | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index d24b527e8c00..3f6ecae0bc68 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -25,6 +25,12 @@ #define KEXEC_ARCH KEXEC_ARCH_AARCH64 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_HIGH_MAXMEMBLOCK_ALLOC_ACCESSIBLE + #ifndef __ASSEMBLY__ /** diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 709d98fea90c..912f64f505f7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -84,8 +84,8 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, + crash_size, CRASH_ALIGN); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); @@ -103,7 +103,7 @@ static void __init reserve_crashkernel(void) return; } - if (!IS_ALIGNED(crash_base, SZ_2M)) { + if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) { pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); return; } -- 2.20.1
[PATCH v14 01/11] x86: kdump: replace the hard-coded alignment with macro CRASH_ALIGN
Move CRASH_ALIGN to header asm/kexec.h for later use. Besides, the alignment of crash kernel regions in x86 is 16M(CRASH_ALIGN), but function reserve_crashkernel() also used 1M alignment. So just replace hard-coded alignment 1M with macro CRASH_ALIGN. Suggested-by: Dave Young Suggested-by: Baoquan He Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/include/asm/kexec.h | 3 +++ arch/x86/kernel/setup.c | 5 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6802c59e8252..be18dc7ae51f 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -18,6 +18,9 @@ # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +/* 16M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_16M + #ifndef __ASSEMBLY__ #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3412c4595efd..da769845597d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -390,9 +390,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M - /* * Keep the crash kernel below this limit. * @@ -510,7 +507,7 @@ static void __init reserve_crashkernel(void) } else { unsigned long long start; - start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base, + start = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); -- 2.20.1
[PATCH v14 04/11] x86: kdump: move xen_pv_domain() check and insert_resource() to setup_arch()
We will make the functions reserve_crashkernel() as generic, the xen_pv_domain() check in reserve_crashkernel() is relevant only to x86, the same as insert_resource() in reserve_crashkernel[_low](). So move xen_pv_domain() check and insert_resource() to setup_arch() to keep them in x86. Suggested-by: Mike Rapoport Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/kernel/setup.c | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 086a04235be4..5d676efc32f6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -454,7 +454,6 @@ static int __init reserve_crashkernel_low(void) crashk_low_res.start = low_base; crashk_low_res.end = low_base + low_size - 1; - insert_resource(_resource, _low_res); #endif return 0; } @@ -478,11 +477,6 @@ static void __init reserve_crashkernel(void) high = true; } - if (xen_pv_domain()) { - pr_info("Ignoring crashkernel for a Xen PV domain\n"); - return; - } - /* 0 means: find the address automatically */ if (!crash_base) { /* @@ -529,7 +523,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - insert_resource(_resource, _res); } #else static void __init reserve_crashkernel(void) @@ -1151,7 +1144,17 @@ void __init setup_arch(char **cmdline_p) * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. */ - reserve_crashkernel(); + if (xen_pv_domain()) + pr_info("Ignoring crashkernel for a Xen PV domain\n"); + else { + reserve_crashkernel(); +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.end > crashk_res.start) + insert_resource(_resource, _res); + if (crashk_low_res.end > crashk_low_res.start) + insert_resource(_resource, _low_res); +#endif + } memblock_find_dma_reserve(); -- 2.20.1
[PATCH v3] cgroup-v1: add disabled controller check in cgroup1_parse_param()
When mounting a cgroup hierarchy with disabled controller in cgroup v1, all available controllers will be attached. For example, boot with cgroup_no_v1=cpu or cgroup_disable=cpu, and then mount with "mount -t cgroup -ocpu cpu /sys/fs/cgroup/cpu", then all enabled controllers will be attached except cpu. Fix this by adding disabled controller check in cgroup1_parse_param(). If the specified controller is disabled, just return error with information "Disabled controller xx" rather than attaching all the other enabled controllers. Fixes: f5dfb5315d34 ("cgroup: take options parsing into ->parse_monolithic()") Signed-off-by: Chen Zhou Reviewed-by: Zefan Li --- Changes in v3: - Update the description of commit message. - Add Reviewed-by from Zefan. Changes in v2: - Fix line over 80 characters warning. --- kernel/cgroup/cgroup-v1.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 32596fdbcd5b..a5751784ad74 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -917,6 +917,9 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) for_each_subsys(ss, i) { if (strcmp(param->key, ss->legacy_name)) continue; + if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i)) + return invalfc(fc, "Disabled controller '%s'", + param->key); ctx->subsys_mask |= (1 << i); return 0; } -- 2.20.1
[PATCH v2 1/3] arm64: mm: update the comments about ZONE_DMA
Since patchset "arm64: Default to 32-bit wide ZONE_DMA", ZONE_DMA's size is fine-tuned. In the absence of addressing limited masters, ZONE_DMA will span the whole 32-bit address space, otherwise, in the case of the Raspberry Pi 4, it'll only span the 30-bit address space. Update the comments. Signed-off-by: Chen Zhou Reviewed-by: Nicolas Saenz Julienne --- arch/arm64/mm/init.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 75addb36354a..7b9809e39927 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -53,10 +53,11 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); /* - * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of - * memory as some devices, namely the Raspberry Pi 4, have peripherals with - * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 - * bit addressable memory area. + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA's size is fine-tuned. + * In the absence of addressing limited masters, ZONE_DMA will span the + * whole 32-bit address space, otherwise, in the case of the Raspberry Pi 4, + * it'll only span the 30-bit address space. ZONE_DMA32 will cover the rest + * of the 32 bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; static phys_addr_t arm64_dma32_phys_limit __ro_after_init; -- 2.20.1
[PATCH v2 0/3] arm64: mm: reserve CMA and crashkernel in ZONE_DMA if enabled
Currently, CMA and crashkernel are reserved in ZONE_DMA32, which is OK for majority of devices. But the ones that need them in ZONE_DMA need to configure it explicitly. Since patchset "arm64: Default to 32-bit wide ZONE_DMA", ZONE_DMA's size is fine-tuned. So we could directly reserve CMA and crashkernel in ZONE_DMA if CONFIG_ZONE_DMA is enabled, otherwise, reserving in ZONE_DMA32. Patch 1 updates the comments about the ZONE_DMA. Patch 2 move dma_contiguous_reserve() to bootmem_init() Patch 3 reserve CMA and crashkernel in ZONE_DMA if enabled Changes since v1: - Add Reviewed-by for patch 1 from Nicolas. - Suggested by Nicolas, also reserve CMA in ZONE_DMA if enabled. Chen Zhou (3): arm64: mm: update the comments about ZONE_DMA arm64: mm: move dma_contiguous_reserve() to bootmem_init() arm64: mm: reserve CMA and crashkernel in ZONE_DMA if enabled arch/arm64/mm/init.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) -- 2.20.1
[PATCH v2 2/3] arm64: mm: move dma_contiguous_reserve() to bootmem_init()
Like crashkernel, CMA might also reserve memory located in ZONE_DMA, so move dma_contiguous_reserve() to bootmem_init() to make sure that arm64_dma_phys_limit is populated. Just place dma_contiguous_reserve() after reserve_crashkernel() as before. Signed-off-by: Chen Zhou Suggested-by: Nicolas Saenz Julienne --- arch/arm64/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7b9809e39927..64a0e8f551d6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -403,8 +403,6 @@ void __init arm64_memblock_init(void) reserve_elfcorehdr(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - - dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -445,6 +443,8 @@ void __init bootmem_init(void) */ reserve_crashkernel(); + dma_contiguous_reserve(arm64_dma32_phys_limit); + memblock_dump_all(); } -- 2.20.1
[PATCH v2 3/3] arm64: mm: reserve CMA and crashkernel in ZONE_DMA if enabled
Currently, CMA and crashkernel are reserved in ZONE_DMA32, which is OK for majority of devices. But the ones that need them in ZONE_DMA need to configure it explicitly. Since patchset "arm64: Default to 32-bit wide ZONE_DMA", ZONE_DMA's size is fine-tuned. So we could directly reserve CMA and crashkernel in ZONE_DMA if CONFIG_ZONE_DMA is enabled, otherwise, reserving in ZONE_DMA32. Signed-off-by: Chen Zhou Suggested-by: Nicolas Saenz Julienne --- arch/arm64/mm/init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 64a0e8f551d6..26de149b21c7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -85,7 +85,8 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, + crash_base = memblock_find_in_range(0, + arm64_dma_phys_limit ? : arm64_dma32_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -443,7 +444,7 @@ void __init bootmem_init(void) */ reserve_crashkernel(); - dma_contiguous_reserve(arm64_dma32_phys_limit); + dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit); memblock_dump_all(); } -- 2.20.1
[PATCH 2/2] arm64: mm: fix kdump broken with ZONE_DMA reintroduced
If the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. Fix this by reserving low memory in ZONE_DMA if CONFIG_ZONE_DMA is enabled, otherwise, reserving in ZONE_DMA32. Fixes: bff3b04460a8 ("arm64: mm: reserve CMA and crashkernel in ZONE_DMA32") Signed-off-by: Chen Zhou --- arch/arm64/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7b9809e39927..5074e945f1a6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -85,7 +85,8 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, + crash_base = memblock_find_in_range(0, + arm64_dma_phys_limit ? : arm64_dma32_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", -- 2.20.1
[PATCH 1/2] arm64: mm: update the comments about ZONE_DMA
Since patchset "arm64: Default to 32-bit wide ZONE_DMA", ZONE_DMA's size is fine-tuned. In the absence of addressing limited masters, ZONE_DMA will span the whole 32-bit address space, otherwise, in the case of the Raspberry Pi 4, it'll only span the 30-bit address space. Update the comments. Signed-off-by: Chen Zhou --- arch/arm64/mm/init.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 75addb36354a..7b9809e39927 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -53,10 +53,11 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); /* - * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of - * memory as some devices, namely the Raspberry Pi 4, have peripherals with - * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 - * bit addressable memory area. + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA's size is fine-tuned. + * In the absence of addressing limited masters, ZONE_DMA will span the + * whole 32-bit address space, otherwise, in the case of the Raspberry Pi 4, + * it'll only span the 30-bit address space. ZONE_DMA32 will cover the rest + * of the 32 bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; static phys_addr_t arm64_dma32_phys_limit __ro_after_init; -- 2.20.1
[PATCH 0/2] arm64: mm: fix kdump broken with ZONE_DMA reintroduced
If the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. Fix this by reserving low memory in ZONE_DMA if CONFIG_ZONE_DMA is enabled, otherwise, reserving in ZONE_DMA32. Patch 1 updates the comments about the ZONE_DMA. Patch 2 fix kdump broken. Chen Zhou (2): arm64: mm: update the comments about ZONE_DMA arm64: mm: fix kdump broken with ZONE_DMA reintroduced arch/arm64/mm/init.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) -- 2.20.1
[PATCH v2] cgroup-v1: add disabled controller check in cgroup1_parse_param()
When mounting a cgroup hierarchy with disabled controller in cgroup v1, all available controllers will be attached. Add disabled controller check in cgroup1_parse_param() and return directly if the specified controller is disabled. Signed-off-by: Chen Zhou --- Changes in v2: - Fix line over 80 characters warning. --- kernel/cgroup/cgroup-v1.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 191c329e482a..5190c42fea8b 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -915,6 +915,9 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) for_each_subsys(ss, i) { if (strcmp(param->key, ss->legacy_name)) continue; + if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i)) + return invalfc(fc, "Disabled controller '%s'", + param->key); ctx->subsys_mask |= (1 << i); return 0; } -- 2.20.1
[PATCH] drm/msm/dpu: Fix error return code in dpu_mdss_init()
Fix to return a negative error code from the error handling case instead of 0 in function dpu_mdss_init(), as done elsewhere in this function. Fixes: 070e64dc1bbc ("drm/msm/dpu: Convert to a chained irq chip") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c index cd4078807db1..6e600b4ca995 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c @@ -297,8 +297,10 @@ int dpu_mdss_init(struct drm_device *dev) goto irq_domain_error; irq = platform_get_irq(pdev, 0); - if (irq < 0) + if (irq < 0) { + ret = irq; goto irq_error; + } irq_set_chained_handler_and_data(irq, dpu_mdss_irq, dpu_mdss); -- 2.20.1
[PATCH] KVM: SVM: fix error return code in svm_create_vcpu()
Fix to return a negative error code from the error handling case instead of 0 in function svm_create_vcpu(), as done elsewhere in this function. Fixes: f4c847a95654 ("KVM: SVM: refactor msr permission bitmap allocation") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- arch/x86/kvm/svm/svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1e81cfebd491..79b3a564f1c9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1309,8 +1309,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->msrpm = svm_vcpu_alloc_msrpm(); - if (!svm->msrpm) + if (!svm->msrpm) { + err = -ENOMEM; goto error_free_vmcb_page; + } svm_vcpu_init_msrpm(vcpu, svm->msrpm); -- 2.20.1
[PATCH v2] usb: gadget: mass_storage: fix error return code in msg_bind()
Fix to return a negative error code from the error handling case instead of 0 in function msg_bind(), as done elsewhere in this function. Fixes: d86788979761 ("usb: gadget: mass_storage: allocate and init otg descriptor by otg capabilities") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/usb/gadget/legacy/mass_storage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/mass_storage.c b/drivers/usb/gadget/legacy/mass_storage.c index 9ed22c5fb7fe..ac1741126619 100644 --- a/drivers/usb/gadget/legacy/mass_storage.c +++ b/drivers/usb/gadget/legacy/mass_storage.c @@ -175,8 +175,10 @@ static int msg_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(cdev->gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail_string_ids; + } usb_otg_descriptor_init(cdev->gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; -- 2.20.1
[PATCH v2] selinux: Fix error return code in sel_ib_pkey_sid_slow()
Fix to return a negative error code from the error handling case instead of 0 in function sel_ib_pkey_sid_slow(), as done elsewhere in this function. Fixes: 409dcf31538a ("selinux: Add a cache for quicker retreival of PKey SIDs") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- security/selinux/ibpkey.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index f68a7617cfb9..3a63a989e55e 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -151,8 +151,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) * is valid, it just won't be added to the cache. */ new = kzalloc(sizeof(*new), GFP_ATOMIC); - if (!new) + if (!new) { + ret = -ENOMEM; goto out; + } new->psec.subnet_prefix = subnet_prefix; new->psec.pkey = pkey_num; -- 2.20.1
[PATCH] RDMA/core: Fix error return code in _ib_modify_qp()
Fix to return a negative error code from the error handling case instead of 0 in function _ib_modify_qp(), as done elsewhere in this function. Fixes: 51aab12631dd ("RDMA/core: Get xmit slave for LAG") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/infiniband/core/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b4..3d895cc41c3a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1698,8 +1698,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, slave = rdma_lag_get_ah_roce_slave(qp->device, >ah_attr, GFP_KERNEL); - if (IS_ERR(slave)) + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); goto out_av; + } attr->xmit_slave = slave; } } -- 2.20.1
[PATCH] selinux: Fix error return code in sel_ib_pkey_sid_slow()
Fix to return a negative error code from the error handling case instead of 0 in function sel_ib_pkey_sid_slow(), as done elsewhere in this function. Fixes: 409dcf31538a ("selinux: Add a cache for quicker retreival of PKey SIDs") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- security/selinux/ibpkey.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index f68a7617cfb9..680b2dd1520f 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -151,8 +151,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) * is valid, it just won't be added to the cache. */ new = kzalloc(sizeof(*new), GFP_ATOMIC); - if (!new) + if (IS_ERR(new)) { + ret = PTR_ERR(new); goto out; + } new->psec.subnet_prefix = subnet_prefix; new->psec.pkey = pkey_num; -- 2.20.1
[PATCH] usb: gadget: mass_storage: fix error return code in msg_bind()
Fix to return a negative error code from the error handling case instead of 0 in function msg_bind(), as done elsewhere in this function. Fixes: d86788979761 ("usb: gadget: mass_storage: allocate and init otg descriptor by otg capabilities") Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/usb/gadget/legacy/mass_storage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/mass_storage.c b/drivers/usb/gadget/legacy/mass_storage.c index 9ed22c5fb7fe..7a88c5282d61 100644 --- a/drivers/usb/gadget/legacy/mass_storage.c +++ b/drivers/usb/gadget/legacy/mass_storage.c @@ -175,8 +175,10 @@ static int msg_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(cdev->gadget); - if (!usb_desc) + if (IS_ERR(usb_desc)) { + status = PTR_ERR(usb_desc); goto fail_string_ids; + } usb_otg_descriptor_init(cdev->gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; -- 2.20.1
[PATCH v13 4/8] x86: kdump: move reserve_crashkernel[_low]() into crash_core.c
Make the functions reserve_crashkernel[_low]() as generic. Arm64 will use these to reimplement crashkernel=X. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/include/asm/kexec.h | 25 ++ arch/x86/kernel/setup.c | 151 +--- include/linux/crash_core.h | 4 + include/linux/kexec.h| 2 - kernel/crash_core.c | 164 +++ kernel/kexec_core.c | 17 6 files changed, 195 insertions(+), 168 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 8cf9d3fd31c7..34afa7b645f9 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -21,6 +21,27 @@ /* 2M alignment for crash kernel regions */ #define CRASH_ALIGNSZ_16M +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAXSZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAXSZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + #ifndef __ASSEMBLY__ #include @@ -200,6 +221,10 @@ typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); +#ifdef CONFIG_KEXEC_CORE +extern void __init reserve_crashkernel(void); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1289f079ad5f..00b3840d30f9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -25,8 +25,6 @@ #include -#include - #include #include #include @@ -38,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -389,153 +388,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) } } -/* - * - Crashkernel reservation -- - */ - -#ifdef CONFIG_KEXEC_CORE - -/* - * Keep the crash kernel below this limit. - * - * Earlier 32-bits kernels would limit the kernel to the low 512 MB range - * due to mapping restrictions. - * - * 64-bit kdump kernels need to be restricted to be under 64 TB, which is - * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jump could be from 5-level paging to 4-level paging, the jump will fail if - * the kernel is put above 64 TB, and during the 1st kernel bootup there's - * no good way to detect the paging mode of the target kernel which will be - * loaded for dumping. - */ -#ifdef CONFIG_X86_32 -# define CRASH_ADDR_LOW_MAXSZ_512M -# define CRASH_ADDR_HIGH_MAX SZ_512M -#else -# define CRASH_ADDR_LOW_MAXSZ_4G -# define CRASH_ADDR_HIGH_MAX SZ_64T -#endif - -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long low_mem_limit; - int ret; - - low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, low_mem_limit, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_LOW_MAX); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(low_mem_limit
[PATCH v13 5/8] arm64: kdump: introduce some macroes for crash kernel reservation
Introduce macro CRASH_ALIGN for alignment, macro CRASH_ADDR_LOW_MAX for upper bound of low crash memory, macro CRASH_ADDR_HIGH_MAX for upper bound of high crash memory, use macroes instead. Besides, keep consistent with x86, use CRASH_ALIGN as the lower bound of crash kernel reservation. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/arm64/include/asm/kexec.h | 6 ++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/mm/init.c | 8 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index d24b527e8c00..402d208265a3 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -25,6 +25,12 @@ #define KEXEC_ARCH KEXEC_ARCH_AARCH64 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma32_phys_limit +#define CRASH_ADDR_HIGH_MAXMEMBLOCK_ALLOC_ACCESSIBLE + #ifndef __ASSEMBLY__ /** diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index fce8cbecd6bc..12131655cab7 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -96,6 +96,7 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; +extern phys_addr_t arm64_dma32_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 095540667f0f..a07fd8e1f926 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL(memstart_addr); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -85,8 +85,8 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, + crash_size, CRASH_ALIGN); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); @@ -104,7 +104,7 @@ static void __init reserve_crashkernel(void) return; } - if (!IS_ALIGNED(crash_base, SZ_2M)) { + if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) { pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); return; } -- 2.20.1
[PATCH v13 6/8] arm64: kdump: reimplement crashkernel=X
There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. If reserving crashkernel above 4G, in this case, crash dump kernel will boot failure because there is no low memory available for allocation. 3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"), if the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. To solve these issues, change the behavior of crashkernel=X and introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation in DMA zone or DMA32 zone if CONFIG_ZONE_DMA is disabled, and fall back to high allocation if it fails. We can also use "crashkernel=X,high" to select a region above DMA zone, which also tries to allocate at least 256M in DMA zone automatically (or the DMA32 zone if CONFIG_ZONE_DMA is disabled). "crashkernel=Y,low" can be used to allocate specified size low memory. Another minor change, there may be two regions reserved for crash dump kernel, in order to distinct from the high region and make no effect to the use of existing kexec-tools, rename the low region as "Crash kernel (low)". Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/arm64/include/asm/kexec.h | 9 + arch/arm64/kernel/setup.c | 13 +++- arch/arm64/mm/init.c | 60 ++ arch/arm64/mm/mmu.c| 4 +++ kernel/crash_core.c| 8 +++-- 5 files changed, 34 insertions(+), 60 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 402d208265a3..79909ae5e22e 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -28,7 +28,12 @@ /* 2M alignment for crash kernel regions */ #define CRASH_ALIGNSZ_2M +#ifdef CONFIG_ZONE_DMA +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#else #define CRASH_ADDR_LOW_MAX arm64_dma32_phys_limit +#endif + #define CRASH_ADDR_HIGH_MAXMEMBLOCK_ALLOC_ACCESSIBLE #ifndef __ASSEMBLY__ @@ -96,6 +101,10 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#ifdef CONFIG_KEXEC_CORE +extern void __init reserve_crashkernel(void); +#endif + #ifdef CONFIG_KEXEC_FILE #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 133257ffd859..6aff30de8f47 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,18 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" or "Crash kernel (low)" +* region in /proc/iomem. +* In order to distinct from the high region and make no effect +* to the use of existing kexec-tools, rename the low region as +* "Crash kernel (low)". +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) { + crashk_low_res.name = "Crash kernel (low)"; + request_resource(res, _low_res); + } if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index a07fd8e1f926..888c4f7eadc3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -62,66 +63,11 @@ EXPORT_SYMBOL(memstart_addr); phys_addr_t arm64_dma_phys_limit __ro_after_init; phys_addr_t arm64_dma32_phys_limit __ro_after_init; -#ifdef CONFIG_KEXEC_CORE -/* - * reserve_crashkernel() - reserves memory for crash kernel - * - * This function reserves memory area given in "crashkernel=" kernel command - * line parameter. The memory reserved is used by dump capture kernel when - * primary kernel is crashing. - */ -static void __init reserve_crashkernel(void) -{ - unsigned long long crash_base, crash_size; - int ret; - - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - _size, _base); - /* no crashkernel= or invalid value specified */ - if (ret || !crash_size) - return; - - crash_size = PAGE_ALIGN(crash_size); - - if (crash_base == 0) { - /* Current arm64 boot protocol requi
[PATCH v13 2/8] x86: kdump: make the lower bound of crash kernel reservation consistent
The lower bounds of crash kernel reservation and crash kernel low reservation are different, use the consistent value CRASH_ALIGN. Suggested-by: Dave Young Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bf373422dc8a..d1599449a001 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -444,7 +444,7 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_LOW_MAX); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); -- 2.20.1
[PATCH v13 0/8] support reserving crashkernel above 4G on arm64 kdump
i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-June/020737.html [2]: https://github.com/robherring/dt-schema/pull/19 [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 [v8]: https://lkml.org/lkml/2020/5/21/213 [v9]: https://lkml.org/lkml/2020/6/28/73 [v10]: https://lkml.org/lkml/2020/7/2/1443 [v11]: https://lkml.org/lkml/2020/8/1/150 [v12]: https://lkml.org/lkml/2020/9/7/1037 Chen Zhou (8): x86: kdump: replace the hard-coded alignment with macro CRASH_ALIGN x86: kdump: make the lower bound of crash kernel reservation consistent x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel() x86: kdump: move reserve_crashkernel[_low]() into crash_core.c arm64: kdump: introduce some macroes for crash kernel reservation arm64: kdump: reimplement crashkernel=X arm64: kdump: add memory for devices by DT property linux,usable-memory-range kdump: update Documentation about crashkernel Documentation/admin-guide/kdump/kdump.rst | 23 ++- .../admin-guide/kernel-parameters.txt | 12 +- arch/arm64/include/asm/kexec.h| 15 ++ arch/arm64/include/asm/processor.h| 1 + arch/arm64/kernel/setup.c | 13 +- arch/arm64/mm/init.c | 105 --- arch/arm64/mm/mmu.c | 4 + arch/x86/include/asm/kexec.h | 28 +++ arch/x86/kernel/setup.c | 153 +--- include/linux/crash_core.h| 4 + include/linux/kexec.h | 2 - kernel/crash_core.c | 168 ++ kernel/kexec_core.c | 17 -- 13 files changed, 301 insertions(+), 244 deletions(-) -- 2.20.1
[PATCH v13 7/8] arm64: kdump: add memory for devices by DT property linux,usable-memory-range
When reserving crashkernel in high memory, some low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,usable-memory-range = We reused the DT property linux,usable-memory-range and made the low memory region as the second range "BASE2 SIZE2", which keeps compatibility with existing user-space and older kdump kernels. Crash dump kernel reads this property at boot time and call memblock_add() to add the low memory region after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/arm64/mm/init.c | 43 +-- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 888c4f7eadc3..794f992cb200 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,15 @@ static void __init reserve_crashkernel(void) } #endif +/* + * The main usage of linux,usable-memory-range is for crash dump kernel. + * Originally, the number of usable-memory regions is one. Now there may + * be two regions, low region and high region. + * To make compatibility with existing user-space and older kdump, the low + * region is always the last range of linux,usable-memory-range if exist. + */ +#define MAX_USABLE_RANGES 2 + #ifdef CONFIG_CRASH_DUMP static int __init early_init_dt_scan_elfcorehdr(unsigned long node, const char *uname, int depth, void *data) @@ -184,9 +193,9 @@ early_param("mem", early_mem); static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { - struct memblock_region *usablemem = data; - const __be32 *reg; - int len; + struct memblock_region *usable_rgns = data; + const __be32 *reg, *endp; + int len, nr = 0; if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; @@ -195,22 +204,36 @@ static int __init early_init_dt_scan_usablemem(unsigned long node, if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) return 1; - usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); - usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); + endp = reg + (len / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + usable_rgns[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); + usable_rgns[nr].size = dt_mem_next_cell(dt_root_size_cells, ); + + if (++nr >= MAX_USABLE_RANGES) + break; + } return 1; } static void __init fdt_enforce_memory_region(void) { - struct memblock_region reg = { - .size = 0, + struct memblock_region usable_rgns[MAX_USABLE_RANGES] = { + { .size = 0 }, + { .size = 0 } }; - of_scan_flat_dt(early_init_dt_scan_usablemem, ); + of_scan_flat_dt(early_init_dt_scan_usablemem, _rgns); - if (reg.size) - memblock_cap_memory_range(reg.base, reg.size); + /* +* The first range of usable-memory regions is for crash dump +* kernel with only one region or for high region with two regions, +* the second range is dedicated for low region if exist. +*/ + if (usable_rgns[0].size) + memblock_cap_memory_range(usable_rgns[0].base, usable_rgns[0].size); + if (usable_rgns[1].size) + memblock_add(usable_rgns[1].base, usable_rgns[1].size); } void __init arm64_memblock_init(void) -- 2.20.1
[PATCH v13 3/8] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()
To make the functions reserve_crashkernel() as generic, replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/kernel/setup.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d1599449a001..1289f079ad5f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -491,8 +491,9 @@ static void __init reserve_crashkernel(void) if (!crash_base) { /* * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, -* crashkernel=x,high reserves memory over 4G, also allocates -* 256M extra low memory for DMA buffers and swiotlb. +* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX, +* also allocates 256M extra low memory for DMA buffers +* and swiotlb. * But the extra memory is not required for all machines. * So try low memory first and fall back to high memory * unless "crashkernel=size[KMG],high" is specified. @@ -520,7 +521,7 @@ static void __init reserve_crashkernel(void) } } - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { + if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { memblock_free(crash_base, crash_size); return; } -- 2.20.1
[PATCH v13 1/8] x86: kdump: replace the hard-coded alignment with macro CRASH_ALIGN
Move CRASH_ALIGN to header asm/kexec.h and replace the hard-coded alignment with macro CRASH_ALIGN in function reserve_crashkernel(). Suggested-by: Dave Young Signed-off-by: Chen Zhou Tested-by: John Donnelly --- arch/x86/include/asm/kexec.h | 3 +++ arch/x86/kernel/setup.c | 5 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6802c59e8252..8cf9d3fd31c7 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -18,6 +18,9 @@ # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_16M + #ifndef __ASSEMBLY__ #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84f581c91db4..bf373422dc8a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -395,9 +395,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M - /* * Keep the crash kernel below this limit. * @@ -515,7 +512,7 @@ static void __init reserve_crashkernel(void) } else { unsigned long long start; - start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base, + start = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); -- 2.20.1
[PATCH v13 8/8] kdump: update Documentation about crashkernel
For arm64, the behavior of crashkernel=X has been changed, which tries low allocation in DMA zone or DMA32 zone if CONFIG_ZONE_DMA is disabled, and fall back to high allocation if it fails. We can also use "crashkernel=X,high" to select a high region above DMA zone, which also tries to allocate at least 256M low memory in DMA zone automatically (or the DMA32 zone if CONFIG_ZONE_DMA is disabled). "crashkernel=Y,low" can be used to allocate specified size low memory. So update the Documentation. Signed-off-by: Chen Zhou Tested-by: John Donnelly --- Documentation/admin-guide/kdump/kdump.rst | 23 --- .../admin-guide/kernel-parameters.txt | 12 -- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 75a9dd98e76e..bde5f994d185 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,16 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=X" to select a region under 4G first, and + fall back to reserve region above 4G. And go for high allocation + directly if the required size is too large. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if you really have to reserve memory from specified + start address X. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +325,16 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of - the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + On arm64, use "crashkernel=X" to try low allocation in DMA zone (or + DMA32 zone if CONFIG_ZONE_DMA is disabled), and fall back to high + allocation if it fails. + We can also use "crashkernel=X,high" to select a high region above + DMA zone, which also tries to allocate at least 256M low memory in + DMA zone automatically (or the DMA32 zone if CONFIG_ZONE_DMA is disabled). + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if you really have to reserve memory from + specified start address X. Note that the start address of the kernel, + X if explicitly specified, must be aligned to 2MiB (0x20). Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 526d65d8573a..b2955d9379e8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -738,6 +738,9 @@ [KNL, X86-64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] Try low allocation in DMA zone (or DMA32 zone + if CONFIG_ZONE_DMA is disabled), fall back to high allocation + if it fails when '@offset' hasn't been specified. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -754,6 +757,8 @@ Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. + [KNL, arm64] range in high memory. + Allow kernel to allocate physical memory region from top. crashkernel=size[KMG],low [KNL, X86-64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region @@ -762,13 +767,16 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at - at least 256M below 4G automatically. + least 256M below 4G automatically. This one let user to specify own low range under 4G for second kernel instead. 0: to disable
[PATCH -next] drm/amd/display: Remove duplicate include
Remove duplicate header which is included twice. Signed-off-by: Chen Zhou --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index a5d750ed569e..65dc5dcd4eb8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -35,7 +35,6 @@ #include "dcn30_dpp.h" #include "dcn10/dcn10_cm_common.h" #include "dcn30_cm_common.h" -#include "clk_mgr.h" #include "reg_helper.h" #include "abm.h" #include "clk_mgr.h" -- 2.17.1
[PATCH v12 2/9] x86: kdump: make the lower bound of crash kernel reservation consistent
The lower bounds of crash kernel reservation and crash kernel low reservation are different, use the consistent value CRASH_ALIGN. Suggested-by: Dave Young Signed-off-by: Chen Zhou --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 296294ad0dd8..d7fd90c52dae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); + low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); -- 2.20.1
[PATCH v12 9/9] kdump: update Documentation about crashkernel
For arm64, the behavior of crashkernel=X has been changed, which tries low allocation in DMA zone, and fall back to high allocation if it fails. We can also use "crashkernel=X,high" to select a high region above DMA zone, which also tries to allocate at least 256M low memory in DMA zone automatically. "crashkernel=Y,low" can be used to allocate specified size low memory in DMA zone. For non-RPi4 platforms, change DMA zone memtioned above to DMA32 zone. For x86 and arm64, we introduce threshold for the required memory. if required size X is too large and leads to very little free low memory after low allocation, the system may not work well. So add a threshold and go for high allocation directly if the required size is too large. The threshold is set as the half of low memory. So update the Documentation. Signed-off-by: Chen Zhou --- Documentation/admin-guide/kdump/kdump.rst | 25 --- .../admin-guide/kernel-parameters.txt | 13 -- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 2da65fef2a1c..549611abc581 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,16 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=X" to select a region under 4G first, and + fall back to reserve region above 4G. And go for high allocation + directly if the required size is too large. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if you really have to reserve memory from specified + start address X. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +325,18 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of - the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + On arm64, use "crashkernel=X" to try low allocation in DMA zone, and + fall back to high allocation if it fails. And go for high allocation + directly if the required size is too large. + We can also use "crashkernel=X,high" to select a high region above + DMA zone, which also tries to allocate at least 256M low memory in + DMA zone automatically. + "crashkernel=Y,low" can be used to allocate specified size low memory + in DMA zone. + For non-RPi4 platforms, change DMA zone memtioned above to DMA32 zone. + Use "crashkernel=Y@X" if you really have to reserve memory from + specified start address X. Note that the start address of the kernel, + X if explicitly specified, must be aligned to 2MiB (0x20). Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1068742a6df..f7df572d8f64 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -727,6 +727,10 @@ [KNL, X86-64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] Try low allocation in DMA zone, fall back + to high allocation if it fails when '@offset' hasn't been + specified. For non-RPi4 platforms, change DMA zone to + DMA32 zone. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -743,6 +747,8 @@ Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. + [KNL, arm64] range in high memory. + Allow kernel to allocate physical memory region from top. crashkernel=size[KMG],low [KNL, X86-64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region @@ -751,13 +757,16 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sur
[PATCH v12 1/9] x86: kdump: move CRASH_ALIGN to 2M
CONFIG_PHYSICAL_ALIGN can be selected from 2M to 16M and default value is 2M, so move CRASH_ALIGN to 2M, with smaller value reservation can have more chance to succeed. And replace the hard-coded alignment with macro CRASH_ALIGN in function reserve_crashkernel(). Suggested-by: Dave Young Signed-off-by: Chen Zhou --- arch/x86/include/asm/kexec.h | 3 +++ arch/x86/kernel/setup.c | 5 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6802c59e8252..83f200dd54a1 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -18,6 +18,9 @@ # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M + #ifndef __ASSEMBLY__ #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3511736fbc74..296294ad0dd8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -402,9 +402,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M - /* * Keep the crash kernel below this limit. * @@ -530,7 +527,7 @@ static void __init reserve_crashkernel(void) start = memblock_find_in_range(crash_base, crash_base + crash_size, - crash_size, 1 << 20); + crash_size, CRASH_ALIGN); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; -- 2.20.1
[PATCH v12 4/9] x86: kdump: move reserve_crashkernel[_low]() into crash_core.c
Make the functions reserve_crashkernel[_low]() as generic. Arm64 will use these to reimplement crashkernel=X. There is no functional change. Signed-off-by: Chen Zhou --- arch/x86/include/asm/kexec.h | 25 + arch/x86/kernel/setup.c | 163 +-- include/linux/crash_core.h | 4 + include/linux/kexec.h| 2 - kernel/crash_core.c | 179 +++ kernel/kexec_core.c | 17 6 files changed, 210 insertions(+), 180 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 83f200dd54a1..adf5e9a016bd 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -21,6 +21,27 @@ /* 2M alignment for crash kernel regions */ #define CRASH_ALIGNSZ_2M +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAXSZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAXSZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + #ifndef __ASSEMBLY__ #include @@ -200,6 +221,10 @@ typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); +#ifdef CONFIG_KEXEC_CORE +extern void __init reserve_crashkernel(void); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 71a6a6e7ca5b..927ba83e8da4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -22,8 +22,6 @@ #include -#include - #include #include #include @@ -35,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -396,165 +395,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) } } -/* - * - Crashkernel reservation -- - */ - -#ifdef CONFIG_KEXEC_CORE - -/* - * Keep the crash kernel below this limit. - * - * Earlier 32-bits kernels would limit the kernel to the low 512 MB range - * due to mapping restrictions. - * - * 64-bit kdump kernels need to be restricted to be under 64 TB, which is - * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jump could be from 5-level paging to 4-level paging, the jump will fail if - * the kernel is put above 64 TB, and during the 1st kernel bootup there's - * no good way to detect the paging mode of the target kernel which will be - * loaded for dumping. - */ -#ifdef CONFIG_X86_32 -# define CRASH_ADDR_LOW_MAXSZ_512M -# define CRASH_ADDR_HIGH_MAX SZ_512M -#else -# define CRASH_ADDR_LOW_MAXSZ_4G -# define CRASH_ADDR_HIGH_MAX SZ_64T -#endif - -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long total_low_mem; - int ret; - - total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, low_size, CRASH_ALIGN); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - ret = memblock_reserve(low_base, low_size); - if (ret) { - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); - return ret; - } - - pr_info("Reserving %ldMB of low memory at
[PATCH v12 5/9] arm64: kdump: introduce some macroes for crash kernel reservation
Introduce macro CRASH_ALIGN for alignment, macro CRASH_ADDR_LOW_MAX for upper bound of low crash memory, macro CRASH_ADDR_HIGH_MAX for upper bound of high crash memory, use macroes instead. Besides, keep consistent with x86, use CRASH_ALIGN as the lower bound of crash kernel reservation. Signed-off-by: Chen Zhou --- arch/arm64/include/asm/kexec.h | 6 ++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/mm/init.c | 8 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index d24b527e8c00..402d208265a3 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -25,6 +25,12 @@ #define KEXEC_ARCH KEXEC_ARCH_AARCH64 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma32_phys_limit +#define CRASH_ADDR_HIGH_MAXMEMBLOCK_ALLOC_ACCESSIBLE + #ifndef __ASSEMBLY__ /** diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 240fe5e5b720..af71063f352c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -95,6 +95,7 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; +extern phys_addr_t arm64_dma32_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 481d22c32a2e..ad27dc4cc55e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL(vmemmap); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -92,8 +92,8 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, + crash_size, CRASH_ALIGN); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); @@ -111,7 +111,7 @@ static void __init reserve_crashkernel(void) return; } - if (!IS_ALIGNED(crash_base, SZ_2M)) { + if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) { pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); return; } -- 2.20.1
[PATCH v12 6/9] arm64: kdump: reimplement crashkernel=X
There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. If reserving crashkernel above 4G, in this case, crash dump kernel will boot failure because there is no low memory available for allocation. 3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"), if the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. To solve these issues, change the behavior of crashkernel=X and introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation in DMA zone, and fall back to high allocation if it fails. We can also use "crashkernel=X,high" to select a region above DMA zone, which also tries to allocate at least 256M in DMA zone automatically. "crashkernel=Y,low" can be used to allocate specified size low memory. For non-RPi4 platforms, change DMA zone memtioned above to DMA32 zone. Another minor change, there may be two regions reserved for crash dump kernel, in order to distinct from the high region and make no effect to the use of existing kexec-tools, rename the low region as "Crash kernel (low)". Signed-off-by: Chen Zhou --- arch/arm64/include/asm/kexec.h | 9 + arch/arm64/kernel/setup.c | 13 +++- arch/arm64/mm/init.c | 60 ++ arch/arm64/mm/mmu.c| 4 +++ kernel/crash_core.c| 8 +++-- 5 files changed, 34 insertions(+), 60 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 402d208265a3..79909ae5e22e 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -28,7 +28,12 @@ /* 2M alignment for crash kernel regions */ #define CRASH_ALIGNSZ_2M +#ifdef CONFIG_ZONE_DMA +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#else #define CRASH_ADDR_LOW_MAX arm64_dma32_phys_limit +#endif + #define CRASH_ADDR_HIGH_MAXMEMBLOCK_ALLOC_ACCESSIBLE #ifndef __ASSEMBLY__ @@ -96,6 +101,10 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#ifdef CONFIG_KEXEC_CORE +extern void __init reserve_crashkernel(void); +#endif + #ifdef CONFIG_KEXEC_FILE #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 53acbeca4f57..1b24072f2bae 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,18 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" or "Crash kernel (low)" +* region in /proc/iomem. +* In order to distinct from the high region and make no effect +* to the use of existing kexec-tools, rename the low region as +* "Crash kernel (low)". +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) { + crashk_low_res.name = "Crash kernel (low)"; + request_resource(res, _low_res); + } if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ad27dc4cc55e..e56a0e5d5b77 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -69,66 +70,11 @@ EXPORT_SYMBOL(vmemmap); phys_addr_t arm64_dma_phys_limit __ro_after_init; phys_addr_t arm64_dma32_phys_limit __ro_after_init; -#ifdef CONFIG_KEXEC_CORE -/* - * reserve_crashkernel() - reserves memory for crash kernel - * - * This function reserves memory area given in "crashkernel=" kernel command - * line parameter. The memory reserved is used by dump capture kernel when - * primary kernel is crashing. - */ -static void __init reserve_crashkernel(void) -{ - unsigned long long crash_base, crash_size; - int ret; - - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - _size, _base); - /* no crashkernel= or invalid value specified */ - if (ret || !crash_size) - return; - - crash_size = PAGE_ALIGN(crash_size); - - if (crash_base == 0) { - /* Current arm64 boot protocol requires 2MB alignment */ - c
[PATCH v12 0/9] support reserving crashkernel above 4G on arm64 kdump
n a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-June/020737.html [2]: https://github.com/robherring/dt-schema/pull/19 [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 [v8]: https://lkml.org/lkml/2020/5/21/213 [v9]: https://lkml.org/lkml/2020/6/28/73 [v10]: https://lkml.org/lkml/2020/7/2/1443 [v11]: https://lkml.org/lkml/2020/8/1/150 Chen Zhou (9): x86: kdump: move CRASH_ALIGN to 2M x86: kdump: make the lower bound of crash kernel reservation consistent x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]() x86: kdump: move reserve_crashkernel[_low]() into crash_core.c arm64: kdump: introduce some macroes for crash kernel reservation arm64: kdump: reimplement crashkernel=X kdump: add threshold for the required memory arm64: kdump: add memory for devices by DT property linux,usable-memory-range kdump: update Documentation about crashkernel Documentation/admin-guide/kdump/kdump.rst | 25 ++- .../admin-guide/kernel-parameters.txt | 13 +- arch/arm64/include/asm/kexec.h| 15 ++ arch/arm64/include/asm/processor.h| 1 + arch/arm64/kernel/setup.c | 13 +- arch/arm64/mm/init.c | 105 -- arch/arm64/mm/mmu.c | 4 + arch/x86/include/asm/kexec.h | 28 +++ arch/x86/kernel/setup.c | 165 +-- include/linux/crash_core.h| 4 + include/linux/kexec.h | 2 - kernel/crash_core.c | 192 ++ kernel/kexec_core.c | 17 -- 13 files changed, 328 insertions(+), 256 deletions(-) -- 2.20.1
[PATCH v12 7/9] kdump: add threshold for the required memory
For crashkernel=X, if required size X is too large and leads to very little free low memory after low allocation, the system may not work normally. So add a threshold and go for high allocation directly if the required size is too large. The value of threshold is set as the half of the low memory. Signed-off-by: Chen Zhou --- kernel/crash_core.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 3f735cb37ace..d11d597a470d 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -378,6 +378,15 @@ int __init reserve_crashkernel_low(void) } #if defined(CONFIG_X86) || defined(CONFIG_ARM64) + +/* + * Add a threshold for required memory size of crashkernel. If required memory + * size is greater than threshold, just go for high allocation directly. The + * value of threshold is set as half of the total low memory. + */ +#define REQUIRED_MEMORY_THRESHOLD (memblock_mem_size(CRASH_ADDR_LOW_MAX >> \ + PAGE_SHIFT) >> 1) + #ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel @@ -422,7 +431,7 @@ void __init reserve_crashkernel(void) * So try low memory first and fall back to high memory * unless "crashkernel=size[KMG],high" is specified. */ - if (!high) + if (!high && crash_size <= REQUIRED_MEMORY_THRESHOLD) crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, crash_size, CRASH_ALIGN); -- 2.20.1
[PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()
To make the functions reserve_crashkernel[_low]() as generic, replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX. Signed-off-by: Chen Zhou --- arch/x86/kernel/setup.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d7fd90c52dae..71a6a6e7ca5b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void) unsigned long total_low_mem; int ret; - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); + total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT); /* crashkernel=Y,low */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, CRASH_ALIGN); + low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void) if (!crash_base) { /* * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, -* crashkernel=x,high reserves memory over 4G, also allocates -* 256M extra low memory for DMA buffers and swiotlb. +* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX, +* also allocates 256M extra low memory for DMA buffers +* and swiotlb. * But the extra memory is not required for all machines. * So try low memory first and fall back to high memory * unless "crashkernel=size[KMG],high" is specified. @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void) return; } - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { + if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { memblock_free(crash_base, crash_size); return; } -- 2.20.1
[PATCH v12 8/9] arm64: kdump: add memory for devices by DT property linux,usable-memory-range
When reserving crashkernel in high memory, some low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,usable-memory-range = We reused the DT property linux,usable-memory-range and made the low memory region as the second range "BASE2 SIZE2", which keeps compatibility with existing user-space and older kdump kernels. Crash dump kernel reads this property at boot time and call memblock_add() to add the low memory region after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou --- arch/arm64/mm/init.c | 43 +-- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e56a0e5d5b77..2af8c38279d9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -76,6 +76,15 @@ static void __init reserve_crashkernel(void) } #endif +/* + * The main usage of linux,usable-memory-range is for crash dump kernel. + * Originally, the number of usable-memory regions is one. Now there may + * be two regions, low region and high region. + * To make compatibility with existing user-space and older kdump, the low + * region is always the last range of linux,usable-memory-range if exist. + */ +#define MAX_USABLE_RANGES 2 + #ifdef CONFIG_CRASH_DUMP static int __init early_init_dt_scan_elfcorehdr(unsigned long node, const char *uname, int depth, void *data) @@ -191,9 +200,9 @@ early_param("mem", early_mem); static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { - struct memblock_region *usablemem = data; - const __be32 *reg; - int len; + struct memblock_region *usable_rgns = data; + const __be32 *reg, *endp; + int len, nr = 0; if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; @@ -202,22 +211,36 @@ static int __init early_init_dt_scan_usablemem(unsigned long node, if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) return 1; - usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); - usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); + endp = reg + (len / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + usable_rgns[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); + usable_rgns[nr].size = dt_mem_next_cell(dt_root_size_cells, ); + + if (++nr >= MAX_USABLE_RANGES) + break; + } return 1; } static void __init fdt_enforce_memory_region(void) { - struct memblock_region reg = { - .size = 0, + struct memblock_region usable_rgns[MAX_USABLE_RANGES] = { + { .size = 0 }, + { .size = 0 } }; - of_scan_flat_dt(early_init_dt_scan_usablemem, ); + of_scan_flat_dt(early_init_dt_scan_usablemem, _rgns); - if (reg.size) - memblock_cap_memory_range(reg.base, reg.size); + /* +* The first range of usable-memory regions is for crash dump +* kernel with only one region or for high region with two regions, +* the second range is dedicated for low region if exist. +*/ + if (usable_rgns[0].size) + memblock_cap_memory_range(usable_rgns[0].base, usable_rgns[0].size); + if (usable_rgns[1].size) + memblock_add(usable_rgns[1].base, usable_rgns[1].size); } void __init arm64_memblock_init(void) -- 2.20.1
[PATCH v11 4/5] arm64: kdump: add memory for devices by DT property linux,usable-memory-range
When reserving crashkernel in high memory, some low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,usable-memory-range = We reused the DT property linux,usable-memory-range and made the low memory region as the second range "BASE2 SIZE2", which keeps compatibility with existing user-space and older kdump kernels. Crash dump kernel reads this property at boot time and call memblock_add() to add the low memory region after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou --- arch/arm64/mm/init.c | 44 ++-- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 53c8916fd32f..f385a8281d1b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,16 @@ EXPORT_SYMBOL(vmemmap); phys_addr_t arm64_dma_phys_limit __ro_after_init; phys_addr_t arm64_dma32_phys_limit __ro_after_init; +/* + * The main usage of linux,usable-memory-range is for crash dump kernel. + * Originally, the number of usable-memory regions is one. Now there may + * be two regions, low region and high region. + * To make compatibility with existing user-space and older kdump, the low + * region is always the last range of linux,usable-memory-range if exist. + */ +#define MAX_USABLE_RANGES 2 + + #ifdef CONFIG_KEXEC_CORE /* @@ -286,9 +296,9 @@ early_param("mem", early_mem); static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { - struct memblock_region *usablemem = data; - const __be32 *reg; - int len; + struct memblock_region *usable_rgns = data; + const __be32 *reg, *endp; + int len, nr = 0; if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; @@ -297,22 +307,36 @@ static int __init early_init_dt_scan_usablemem(unsigned long node, if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) return 1; - usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); - usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); + endp = reg + (len / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + usable_rgns[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); + usable_rgns[nr].size = dt_mem_next_cell(dt_root_size_cells, ); + + if (++nr >= MAX_USABLE_RANGES) + break; + } return 1; } static void __init fdt_enforce_memory_region(void) { - struct memblock_region reg = { - .size = 0, + struct memblock_region usable_rgns[MAX_USABLE_RANGES] = { + { .size = 0 }, + { .size = 0 } }; - of_scan_flat_dt(early_init_dt_scan_usablemem, ); + of_scan_flat_dt(early_init_dt_scan_usablemem, _rgns); - if (reg.size) - memblock_cap_memory_range(reg.base, reg.size); + /* +* The first range of usable-memory regions is for crash dump +* kernel with only one region or for high region with two regions, +* the second range is dedicated for low region if exist. +*/ + if (usable_rgns[0].size) + memblock_cap_memory_range(usable_rgns[0].base, usable_rgns[0].size); + if (usable_rgns[1].size) + memblock_add(usable_rgns[1].base, usable_rgns[1].size); } void __init arm64_memblock_init(void) -- 2.20.1
[PATCH v11 1/5] arm64: kdump: add macro CRASH_ALIGN and CRASH_ADDR_LOW_MAX
Expose variable arm64_dma32_phys_limit for followup, and add macro CRASH_ALIGN for alignment, macro CRASH_ADDR_LOW_MAX for upper bound of low crash memory. Use macros instead. Signed-off-by: Chen Zhou --- arch/arm64/include/asm/kexec.h | 5 + arch/arm64/include/asm/processor.h | 1 + arch/arm64/mm/init.c | 8 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index d24b527e8c00..1a2f27f12794 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -25,6 +25,11 @@ #define KEXEC_ARCH KEXEC_ARCH_AARCH64 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma32_phys_limit + #ifndef __ASSEMBLY__ /** diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 240fe5e5b720..af71063f352c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -95,6 +95,7 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; +extern phys_addr_t arm64_dma32_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..a3d0193f6a0a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL(vmemmap); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -92,8 +92,8 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(0, CRASH_ADDR_LOW_MAX, + crash_size, CRASH_ALIGN); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); @@ -111,7 +111,7 @@ static void __init reserve_crashkernel(void) return; } - if (!IS_ALIGNED(crash_base, SZ_2M)) { + if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) { pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); return; } -- 2.20.1
[PATCH v11 5/5] kdump: update Documentation about crashkernel
Now the behavior of crashkernel=X has been changed, which tries low allocation in ZONE_DMA, and fall back to high allocation if it fails. If requized size X is too large and leads to very little free memory in ZONE_DMA after low allocation, the system may not work well. So add a threshold and go for high allocation directly if the required size is too large. The threshold is set as the half of low memory. If crash_base is outside ZONE_DMA, try to allocate at least 256M in ZONE_DMA automatically. "crashkernel=Y,low" can be used to allocate specified size low memory. For non-RPi4 platforms, change ZONE_DMA memtioned above to ZONE_DMA32. So update the Documentation. Signed-off-by: Chen Zhou --- Documentation/admin-guide/kdump/kdump.rst | 21 --- .../admin-guide/kernel-parameters.txt | 11 -- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 2da65fef2a1c..4b58f97351d5 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,15 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=X" to select a region under 4G first, and + fall back to reserve region above 4G. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if you really have to reserve memory from specified + start address X. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +324,15 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of - the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + On arm64, use "crashkernel=X" to try low allocation in ZONE_DMA, and + fall back to high allocation if it fails. And go for high allocation + directly if the required size is too large. If crash_base is outside + ZONE_DMA, try to allocate at least 256M in ZONE_DMA automatically. + "crashkernel=Y,low" can be used to allocate specified size low memory. + For non-RPi4 platforms, change ZONE_DMA memtioned above to ZONE_DMA32. + Use "crashkernel=Y@X" if you really have to reserve memory from + specified start address X. Note that the start address of the kernel, + X if explicitly specified, must be aligned to 2MiB (0x20). Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb95fad81c79..d1b6016850d6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -722,6 +722,10 @@ [KNL, x86_64] select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] Try low allocation in ZONE_DMA, fall back + to high allocation if it fails when '@offset' hasn't been + specified. For non-RPi4 platforms, change ZONE_DMA to + ZONE_DMA32. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -746,13 +750,16 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at - at least 256M below 4G automatically. + least 256M below 4G automatically. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - + [KNL, arm64] range under 4G. + This one let user to specify a low range in ZONE_DMA for + crash dump kernel. For non-RPi4 platforms, change ZONE_DMA + to ZONE_DMA32. cryptomgr.notests [KNL] Disable crypto self-tests -- 2.20.1
[PATCH v11 2/5] x86: kdump: move reserve_crashkernel_low() into crash_core.c
In preparation for supporting reserve_crashkernel_low in arm64 as x86_64 does, move reserve_crashkernel_low() into kernel/crash_core.c. BTW, move x86_64 CRASH_ALIGN to 2M suggested by Dave. CONFIG_PHYSICAL_ALIGN can be selected from 2M to 16M, move to the same as arm64. Signed-off-by: Chen Zhou --- arch/x86/include/asm/kexec.h | 24 ++ arch/x86/kernel/setup.c | 86 +++- include/linux/crash_core.h | 3 ++ include/linux/kexec.h| 2 - kernel/crash_core.c | 74 +++ kernel/kexec_core.c | 17 --- 6 files changed, 107 insertions(+), 99 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6802c59e8252..f8f9d952e09f 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -18,6 +18,30 @@ # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M + +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAXSZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAXSZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + #ifndef __ASSEMBLY__ #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..46763c1e5d9f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -401,83 +401,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M - -/* - * Keep the crash kernel below this limit. - * - * Earlier 32-bits kernels would limit the kernel to the low 512 MB range - * due to mapping restrictions. - * - * 64-bit kdump kernels need to be restricted to be under 64 TB, which is - * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jump could be from 5-level paging to 4-level paging, the jump will fail if - * the kernel is put above 64 TB, and during the 1st kernel bootup there's - * no good way to detect the paging mode of the target kernel which will be - * loaded for dumping. - */ -#ifdef CONFIG_X86_32 -# define CRASH_ADDR_LOW_MAXSZ_512M -# define CRASH_ADDR_HIGH_MAX SZ_512M -#else -# define CRASH_ADDR_LOW_MAXSZ_4G -# define CRASH_ADDR_HIGH_MAX SZ_64T -#endif - -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long total_low_mem; - int ret; - - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - ret = memblock_reserve(low_base, low_size); - if (ret) { - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); - return ret; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(total_low_mem >> 20)); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - ins
[PATCH v11 3/5] arm64: kdump: reimplement crashkernel=X
There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. If reserving crashkernel above 4G, in this case, crash dump kernel will boot failure because there is no low memory available for allocation. 3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"), if the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. To solve these issues, change the behavior of crashkernel=X. crashkernel=X tries low allocation in ZONE_DMA, and fall back to high allocation if it fails. If requized size X is too large and leads to very little free memory in ZONE_DMA after low allocation, the system may not work normally. So add a threshold and go for high allocation directly if the required size is too large. The value of threshold is set as the half of the low memory. If crash_base is outside ZONE_DMA, try to allocate at least 256M in ZONE_DMA automatically. "crashkernel=Y,low" can be used to allocate specified size low memory. For non-RPi4 platforms, change ZONE_DMA memtioned above to ZONE_DMA32. Another minor change, there may be two regions reserved for crash dump kernel, in order to distinct from the high region and make no effect to the use of existing kexec-tools, rename the low region as "Crash kernel (low)". Signed-off-by: Chen Zhou --- arch/arm64/include/asm/kexec.h | 4 +++ arch/arm64/kernel/setup.c | 8 +- arch/arm64/mm/init.c | 51 ++ 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 1a2f27f12794..92ed53d0bf21 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -28,7 +28,11 @@ /* 2M alignment for crash kernel regions */ #define CRASH_ALIGNSZ_2M +#ifdef CONFIG_ZONE_DMA +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#else #define CRASH_ADDR_LOW_MAX arm64_dma32_phys_limit +#endif #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..4dc51a2ac012 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,13 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" region in /proc/iomem. +* Note: the low region is renamed as Crash kernel (low). +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) + request_resource(res, _low_res); if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index a3d0193f6a0a..53c8916fd32f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -70,6 +70,14 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE + +/* + * Add a threshold for required memory size of crashkernel. If required memory + * size is greater than threshold, just go for high allocation directly. The + * value of threshold is set as half of the total low memory. + */ +#define REQUIRED_MEMORY_THRESHOLD (memblock_mem_size(CRASH_ADDR_LOW_MAX >> \ + PAGE_SHIFT) >> 1) /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -90,11 +98,22 @@ static void __init reserve_crashkernel(void) crash_size = PAGE_ALIGN(crash_size); - if (crash_base == 0) { - /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, CRASH_ADDR_LOW_MAX, - crash_size, CRASH_ALIGN); - if (crash_base == 0) { + if (!crash_base) { + /* +* Current arm64 boot protocol requires 2MB alignment. +* If required memory size is greater than threshold, just go +* for high allocation directly. +* If required memory size is less than or equal to threshold, +* try low allocation firstly, and then fall back to high allocation +* if it fails. +*/ + if (crash_size <= REQUIRED_MEMORY_THRESHOLD) + crash_base = memblock_find_in_range(0, CRASH_ADDR_LOW_MAX, +
[PATCH v11 0/5] support reserving crashkernel above 4G on arm64 kdump
There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. If reserving crashkernel above 4G, in this case, crash dump kernel will boot failure because there is no low memory available for allocation. 3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"), if the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. To solve these issues, change the behavior of crashkernel=X. crashkernel=X tries low allocation in ZONE_DMA, and fall back to high allocation if it fails. If requized size X is too large and leads to very little free memory in ZONE_DMA after low allocation, the system may not work normally. So add a threshold and go for high allocation directly if the required size is too large. The value of threshold is set as the half of the low memory. If crash_base is outside ZONE_DMA, try to allocate at least 256M in ZONE_DMA automatically. "crashkernel=Y,low" can be used to allocate specified size low memory. For non-RPi4 platforms, change ZONE_DMA memtioned above to ZONE_DMA32. When reserving crashkernel in high memory, some low memory is reserved for crash dump kernel devices. So there may be two regions reserved for crash dump kernel, one is below 4G, the other is above 4G. In order to distinct from the high region and make no effect to the use of existing kexec-tools, rename the low region as "Crash kernel (low)", and pass the low region by reusing DT property "linux,usable-memory-range". We made the low memory region as the last range of "linux,usable-memory-range" to keep compatibility with existing user-space and older kdump kernels. Besides, we need to modify kexec-tools: arm64: support more than one crash kernel regions(see [1]) Another update is document about DT property 'linux,usable-memory-range': schemas: update 'linux,usable-memory-range' node schema(see [2]) Changes since [v10] - Reimplement crashkernel=X suggested by Catalin, Many thanks to Catalin. Changes since [v9] - Patch 1 add Acked-by from Dave. - Update patch 5 according to Dave's comments. - Update chosen schema. Changes since [v8] - Reuse DT property "linux,usable-memory-range". Suggested by Rob, reuse DT property "linux,usable-memory-range" to pass the low memory region. - Fix kdump broken with ZONE_DMA reintroduced. - Update chosen schema. Changes since [v7] - Move x86 CRASH_ALIGN to 2M Suggested by Dave and do some test, move x86 CRASH_ALIGN to 2M. - Update Documentation/devicetree/bindings/chosen.txt. Add corresponding documentation to Documentation/devicetree/bindings/chosen.txt suggested by Arnd. - Add Tested-by from Jhon and pk. Changes since [v6] - Fix build errors reported by kbuild test robot. Changes since [v5] - Move reserve_crashkernel_low() into kernel/crash_core.c. - Delete crashkernel=X,high. - Modify crashkernel=X,low. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. In addition, rename crashk_low_res as "Crash kernel (low)" for arm64, and then pass to crash dump kernel by DT property "linux,low-memory-range". - Update Documentation/admin-guide/kdump/kdump.rst. Changes since [v4] - Reimplement memblock_cap_memory_ranges for multiple ranges by Mike. Changes since [v3] - Add memblock_cap_memory_ranges back for multiple ranges. - Fix some compiling warnings. Changes since [v2] - Split patch "arm64: kdump: support reserving crashkernel above 4G" as two. Put "move reserve_crashkernel_low() into kexec_core.c" in a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-June/020737.html [2]: https://github.com/robherring/dt-schema/pull/19 [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 [v8]: https://lkml.org/lkml/2020/5/21/213 [v9]: https://lkml.org/lkml/2020/6/28/73 [v10]: https://lkml.org/lkml/2020/7/2/1443 Chen Zhou (5): arm64: kdump: add macro CRASH_ALIGN and CRASH_ADDR_LOW_MAX x86: kdump: move reserve_crashkernel_low() into crash_core.c arm64: kdump: reimplement crashkernel=X arm64: kdump: add memory for devices by DT property linux,usable-mem
[PATCH v10 3/5] arm64: kdump: add memory for devices by DT property linux,usable-memory-range
If we want to reserve crashkernel above 4G, we could use parameters "crashkernel=X crashkernel=Y,low", in this case, specified size low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,usable-memory-range = We reused the DT property linux,usable-memory-range and made the low memory region as the second range "BASE2 SIZE2", which keeps compatibility with existing user-space and older kdump kernels. Crash dump kernel reads this property at boot time and call memblock_add() to add the low memory region after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/arm64/mm/init.c | 43 +-- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ce7ced85f5fb..f5b31e8f1f34 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,15 @@ EXPORT_SYMBOL(vmemmap); phys_addr_t arm64_dma_phys_limit __ro_after_init; static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +/* + * The main usage of linux,usable-memory-range is for crash dump kernel. + * Originally, the number of usable-memory regions is one. Now crash dump + * kernel support at most two regions, low region and high region. + * To make compatibility with existing user-space and older kdump, the low + * region is always the last range of linux,usable-memory-range if exist. + */ +#define MAX_USABLE_RANGES 2 + #ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel @@ -272,9 +281,9 @@ early_param("mem", early_mem); static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { - struct memblock_region *usablemem = data; - const __be32 *reg; - int len; + struct memblock_region *usable_rgns = data; + const __be32 *reg, *endp; + int len, nr = 0; if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; @@ -283,22 +292,36 @@ static int __init early_init_dt_scan_usablemem(unsigned long node, if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) return 1; - usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); - usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); + endp = reg + (len / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + usable_rgns[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); + usable_rgns[nr].size = dt_mem_next_cell(dt_root_size_cells, ); + + if (++nr >= MAX_USABLE_RANGES) + break; + } return 1; } static void __init fdt_enforce_memory_region(void) { - struct memblock_region reg = { - .size = 0, + struct memblock_region usable_rgns[MAX_USABLE_RANGES] = { + { .size = 0 }, + { .size = 0 } }; - of_scan_flat_dt(early_init_dt_scan_usablemem, ); + of_scan_flat_dt(early_init_dt_scan_usablemem, _rgns); - if (reg.size) - memblock_cap_memory_range(reg.base, reg.size); + /* +* The first range of usable-memory regions is for crash dump +* kernel with only one region or for high region with two regions, +* the second range is dedicated for low region if exist. +*/ + if (usable_rgns[0].size) + memblock_cap_memory_range(usable_rgns[0].base, usable_rgns[0].size); + if (usable_rgns[1].size) + memblock_add(usable_rgns[1].base, usable_rgns[1].size); } void __init arm64_memblock_init(void) -- 2.20.1
[PATCH v10 4/5] arm64: kdump: fix kdump broken with ZONE_DMA reintroduced
commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") broken the arm64 kdump. If the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. This patch addressed the above issue based on "reserving crashkernel above 4G". Originally, we reserve low memory below 4G, and now just need to adjust memory limit to arm64_dma_phys_limit in reserve_crashkernel_low if ZONE_DMA is enabled. That is, if there are devices need to use ZONE_DMA in crash dump kernel, it is a good choice to use parameters "crashkernel=X crashkernel=Y,low". Signed-off-by: Chen Zhou --- kernel/crash_core.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index a7580d291c37..e8ecbbc761a3 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -320,6 +320,7 @@ int __init reserve_crashkernel_low(void) unsigned long long base, low_base = 0, low_size = 0; unsigned long total_low_mem; int ret; + phys_addr_t crash_max = 1ULL << 32; total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); @@ -352,7 +353,11 @@ int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); +#ifdef CONFIG_ARM64 + if (IS_ENABLED(CONFIG_ZONE_DMA)) + crash_max = arm64_dma_phys_limit; +#endif + low_base = memblock_find_in_range(0, crash_max, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); -- 2.20.1
[PATCH v10 2/5] arm64: kdump: reserve crashkenel above 4G for crash dump kernel
Crashkernel=X tries to reserve memory for the crash dump kernel under 4G. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. Suggested by James, just introduced crashkernel=X,low to arm64. As memtioned above, if crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G, which is much simpler. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/arm64/kernel/setup.c | 8 +++- arch/arm64/mm/init.c | 31 +-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..4dc51a2ac012 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,13 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" region in /proc/iomem. +* Note: the low region is renamed as Crash kernel (low). +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) + request_resource(res, _low_res); if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..ce7ced85f5fb 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -81,6 +81,7 @@ static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; int ret; + phys_addr_t crash_max = arm64_dma32_phys_limit; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), _size, _base); @@ -88,12 +89,38 @@ static void __init reserve_crashkernel(void) if (ret || !crash_size) return; + ret = reserve_crashkernel_low(); + if (!ret && crashk_low_res.end) { + /* +* If crashkernel=X,low specified, there may be two regions, +* we need to make some changes as follows: +* +* 1. rename the low region as "Crash kernel (low)" +* In order to distinct from the high region and make no effect +* to the use of existing kexec-tools, rename the low region as +* "Crash kernel (low)". +* +* 2. change the upper bound for crash memory +* Set MEMBLOCK_ALLOC_ACCESSIBLE upper bound for crash memory. +* +* 3. mark the low region as "nomap" +* The low region is intended to be used for crash dump kernel +* devices, just mark the low region as "nomap" simply. +*/ + const char *rename = "Crash kernel (low)"; + + crashk_low_res.name = rename; + crash_max = MEMBLOCK_ALLOC_ACCESSIBLE; + memblock_mark_nomap(crashk_low_res.start, + resource_size(_low_res)); + } + crash_size = PAGE_ALIGN(crash_size); if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(0, crash_max, crash_size, + SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); -- 2.20.1
[PATCH v10 5/5] kdump: update Documentation about crashkernel on arm64
Now we support crashkernel=X,[low] on arm64, update the Documentation. We could use parameters "crashkernel=X crashkernel=Y,low" to reserve memory above 4G. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- Documentation/admin-guide/kdump/kdump.rst | 14 -- Documentation/admin-guide/kernel-parameters.txt | 17 +++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 2da65fef2a1c..e80fc9e28a9a 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,15 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=Y" to select a region under 4G first, and + fall back to reserve region above 4G. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. + Use "crashkernel=Y@X" if we really have to reserve memory from specified + start address X. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +324,10 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of + On arm64, use "crashkernel=Y[@X]". Note that the start address of the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + If crashkernel=Z,low is specified simultaneously, reserve spcified size + low memory firstly and then reserve memory above 4G. Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb95fad81c79..58a731eed011 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -722,6 +722,9 @@ [KNL, x86_64] select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] If crashkernel=X,low is specified, reserve + spcified size low memory firstly, and then reserve memory + above 4G. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -746,13 +749,23 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at - at least 256M below 4G automatically. + least 256M below 4G automatically. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - + [KNL, arm64] range under 4G. + This one let user to specify own low range under 4G + for crash dump kernel instead. + Be different from x86_64, kernel reserves specified size + physical memory region only when this parameter is specified + instead of trying to reserve at least 256M below 4G + automatically. + Use this parameter along with crashkernel=X when we want + to reserve crashkernel above 4G. If there are devices + need to use ZONE_DMA in crash dump kernel, it is also + a good choice. cryptomgr.notests [KNL] Disable crypto self-tests -- 2.20.1
[PATCH v10 1/5] x86: kdump: move reserve_crashkernel_low() into crash_core.c
In preparation for supporting reserve_crashkernel_low in arm64 as x86_64 does, move reserve_crashkernel_low() into kernel/crash_core.c. BTW, move x86_64 CRASH_ALIGN to 2M suggested by Dave. CONFIG_PHYSICAL_ALIGN can be selected from 2M to 16M, move to the same as arm64. Note, in arm64, we reserve low memory if and only if crashkernel=X,low is specified. Different with x86_64, don't set low memory automatically. Reported-by: kbuild test robot Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha Acked-by: Dave Young --- arch/x86/kernel/setup.c| 66 - include/linux/crash_core.h | 3 ++ include/linux/kexec.h | 2 - kernel/crash_core.c| 85 ++ kernel/kexec_core.c| 17 5 files changed, 96 insertions(+), 77 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..33db99ae3035 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -401,8 +401,8 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M /* * Keep the crash kernel below this limit. @@ -425,59 +425,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) # define CRASH_ADDR_HIGH_MAX SZ_64T #endif -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long total_low_mem; - int ret; - - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - ret = memblock_reserve(low_base, low_size); - if (ret) { - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); - return ret; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(total_low_mem >> 20)); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - insert_resource(_resource, _low_res); -#endif - return 0; -} - static void __init reserve_crashkernel(void) { unsigned long long crash_size, crash_base, total_mem; @@ -541,9 +488,12 @@ static void __init reserve_crashkernel(void) return; } - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); - return; + if (crash_base >= (1ULL << 32)) { + if (reserve_crashkernel_low()) { + memblock_free(crash_base, crash_size); + return; + } + insert_resource(_resource, _low_res); } pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..4df8c0bff03e 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -63,6 +63,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; +extern struct resource crashk_res; +extern struct resource crashk_low_res; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); @@ -74,5 +76,6 @@ int parse_crashkernel_high(char *cmdline, uns
[PATCH v10 0/5] support reserving crashkernel above 4G on arm64 kdump
This patch series enable reserving crashkernel above 4G in arm64. There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. Currently, crashkernel=Y@X can be used to reserve crashkernel above 4G, in this case, if swiotlb or DMA buffers are required, crash dump kernel will boot failure because there is no low memory available for allocation. 3. commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") broken the arm64 kdump. If the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. To solve these issues, introduce crashkernel=X,low to reserve specified size low memory. Crashkernel=X tries to reserve memory for the crash dump kernel under 4G. If crashkernel=Y,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. When crashkernel is reserved above 4G in memory and crashkernel=X,low is specified simultaneously, kernel should reserve specified size low memory for crash dump kernel devices. So there may be two crash kernel regions, one is below 4G, the other is above 4G. In order to distinct from the high region and make no effect to the use of kexec-tools, rename the low region as "Crash kernel (low)", and pass the low region by reusing DT property "linux,usable-memory-range". We made the low memory region as the last range of "linux,usable-memory-range" to keep compatibility with existing user-space and older kdump kernels. Besides, we need to modify kexec-tools: arm64: support more than one crash kernel regions(see [1]) Another update is document about DT property 'linux,usable-memory-range': schemas: update 'linux,usable-memory-range' node schema(see [2]) The previous changes and discussions can be retrieved from: Changes since [v9] - Patch 1 add Acked-by from Dave. - Update patch 5 according to Dave's comments. - Update chosen schema. Changes since [v8] - Reuse DT property "linux,usable-memory-range". Suggested by Rob, reuse DT property "linux,usable-memory-range" to pass the low memory region. - Fix kdump broken with ZONE_DMA reintroduced. - Update chosen schema. Changes since [v7] - Move x86 CRASH_ALIGN to 2M Suggested by Dave and do some test, move x86 CRASH_ALIGN to 2M. - Update Documentation/devicetree/bindings/chosen.txt. Add corresponding documentation to Documentation/devicetree/bindings/chosen.txt suggested by Arnd. - Add Tested-by from Jhon and pk. Changes since [v6] - Fix build errors reported by kbuild test robot. Changes since [v5] - Move reserve_crashkernel_low() into kernel/crash_core.c. - Delete crashkernel=X,high. - Modify crashkernel=X,low. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. In addition, rename crashk_low_res as "Crash kernel (low)" for arm64, and then pass to crash dump kernel by DT property "linux,low-memory-range". - Update Documentation/admin-guide/kdump/kdump.rst. Changes since [v4] - Reimplement memblock_cap_memory_ranges for multiple ranges by Mike. Changes since [v3] - Add memblock_cap_memory_ranges back for multiple ranges. - Fix some compiling warnings. Changes since [v2] - Split patch "arm64: kdump: support reserving crashkernel above 4G" as two. Put "move reserve_crashkernel_low() into kexec_core.c" in a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-June/020737.html [2]: https://github.com/robherring/dt-schema/pull/19 [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 [v8]: https://lkml.org/lkml/2020/5/21/213 [v9]: https://lkml.org/lkml/2020/6/28/73 Chen Zhou (5): x86: kdump: move reserve_crashkernel_low() into crash_core.c arm64: kdump: reserve crashkenel above 4G for crash dump kernel arm64: kdump: add memory for devices by DT property linux,usable-memory-range arm64: kdump: fix kdump broken with ZONE_DMA reintroduced kdump: update Documentation about crashkernel on arm64 Documentation/admin-guide/kdump/kdump.rst | 14 ++- .../admin-guide/kernel-parameters.txt | 17 +++- arch/arm64/kernel/setup.c
[PATCH v9 1/5] x86: kdump: move reserve_crashkernel_low() into crash_core.c
In preparation for supporting reserve_crashkernel_low in arm64 as x86_64 does, move reserve_crashkernel_low() into kernel/crash_core.c. BTW, move x86_64 CRASH_ALIGN to 2M suggested by Dave. CONFIG_PHYSICAL_ALIGN can be selected from 2M to 16M, move to the same as arm64. Note, in arm64, we reserve low memory if and only if crashkernel=X,low is specified. Different with x86_64, don't set low memory automatically. Reported-by: kbuild test robot Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/x86/kernel/setup.c| 66 - include/linux/crash_core.h | 3 ++ include/linux/kexec.h | 2 - kernel/crash_core.c| 85 ++ kernel/kexec_core.c| 17 5 files changed, 96 insertions(+), 77 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..33db99ae3035 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -401,8 +401,8 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M /* * Keep the crash kernel below this limit. @@ -425,59 +425,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) # define CRASH_ADDR_HIGH_MAX SZ_64T #endif -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long total_low_mem; - int ret; - - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - ret = memblock_reserve(low_base, low_size); - if (ret) { - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); - return ret; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(total_low_mem >> 20)); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - insert_resource(_resource, _low_res); -#endif - return 0; -} - static void __init reserve_crashkernel(void) { unsigned long long crash_size, crash_base, total_mem; @@ -541,9 +488,12 @@ static void __init reserve_crashkernel(void) return; } - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); - return; + if (crash_base >= (1ULL << 32)) { + if (reserve_crashkernel_low()) { + memblock_free(crash_base, crash_size); + return; + } + insert_resource(_resource, _low_res); } pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..4df8c0bff03e 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -63,6 +63,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; +extern struct resource crashk_res; +extern struct resource crashk_low_res; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); @@ -74,5 +76,6 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram
[PATCH v9 0/5] support reserving crashkernel above 4G on arm64 kdump
This patch series enable reserving crashkernel above 4G in arm64. There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. Currently, crashkernel=Y@X can be used to reserve crashkernel above 4G, in this case, if swiotlb or DMA buffers are required, crash dump kernel will boot failure because there is no low memory available for allocation. 3. commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") broken the arm64 kdump. If the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. To solve these issues, introduce crashkernel=X,low to reserve specified size low memory. Crashkernel=X tries to reserve memory for the crash dump kernel under 4G. If crashkernel=Y,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. When crashkernel is reserved above 4G in memory and crashkernel=X,low is specified simultaneously, kernel should reserve specified size low memory for crash dump kernel devices. So there may be two crash kernel regions, one is below 4G, the other is above 4G. In order to distinct from the high region and make no effect to the use of kexec-tools, rename the low region as "Crash kernel (low)", and pass the low region by reusing DT property "linux,usable-memory-range". We made the low memory region as the last range of "linux,usable-memory-range" to keep compatibility with existing user-space and older kdump kernels. Besides, we need to modify kexec-tools: arm64: support more than one crash kernel regions(see [1]) Another update is document about DT property 'linux,usable-memory-range': schemas: update 'linux,usable-memory-range' node schema(see [2]) The previous changes and discussions can be retrieved from: Changes since [v8] - Reuse DT property "linux,usable-memory-range". Suggested by Rob, reuse DT property "linux,usable-memory-range" to pass the low memory region. - Fix kdump broken with ZONE_DMA reintroduced. - Update chosen schema. Changes since [v7] - Move x86 CRASH_ALIGN to 2M Suggested by Dave and do some test, move x86 CRASH_ALIGN to 2M. - Update Documentation/devicetree/bindings/chosen.txt. Add corresponding documentation to Documentation/devicetree/bindings/chosen.txt suggested by Arnd. - Add Tested-by from Jhon and pk. Changes since [v6] - Fix build errors reported by kbuild test robot. Changes since [v5] - Move reserve_crashkernel_low() into kernel/crash_core.c. - Delete crashkernel=X,high. - Modify crashkernel=X,low. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. In addition, rename crashk_low_res as "Crash kernel (low)" for arm64, and then pass to crash dump kernel by DT property "linux,low-memory-range". - Update Documentation/admin-guide/kdump/kdump.rst. Changes since [v4] - Reimplement memblock_cap_memory_ranges for multiple ranges by Mike. Changes since [v3] - Add memblock_cap_memory_ranges back for multiple ranges. - Fix some compiling warnings. Changes since [v2] - Split patch "arm64: kdump: support reserving crashkernel above 4G" as two. Put "move reserve_crashkernel_low() into kexec_core.c" in a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-June/020737.html [2]: https://github.com/robherring/dt-schema/pull/19 [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 [v8]: https://lkml.org/lkml/2020/5/21/213 Chen Zhou (5): x86: kdump: move reserve_crashkernel_low() into crash_core.c arm64: kdump: reserve crashkenel above 4G for crash dump kernel arm64: kdump: add memory for devices by DT property linux,usable-memory-range arm64: kdump: fix kdump broken with ZONE_DMA reintroduced kdump: update Documentation about crashkernel on arm64 Documentation/admin-guide/kdump/kdump.rst | 13 ++- .../admin-guide/kernel-parameters.txt | 17 +++- arch/arm64/kernel/setup.c | 8 +- arch/arm64/mm/init.c | 74 --- arch/x86/kernel/setup.c | 66 ++ include/linux/
[PATCH v9 3/5] arm64: kdump: add memory for devices by DT property linux,usable-memory-range
If we want to reserve crashkernel above 4G, we could use parameters "crashkernel=X crashkernel=Y,low", in this case, specified size low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,usable-memory-range = We reused the DT property linux,usable-memory-range and made the low memory region as the second range "BASE2 SIZE2", which keeps compatibility with existing user-space and older kdump kernels. Crash dump kernel reads this property at boot time and call memblock_add() to add the low memory region after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/arm64/mm/init.c | 43 +-- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ce7ced85f5fb..f5b31e8f1f34 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,15 @@ EXPORT_SYMBOL(vmemmap); phys_addr_t arm64_dma_phys_limit __ro_after_init; static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +/* + * The main usage of linux,usable-memory-range is for crash dump kernel. + * Originally, the number of usable-memory regions is one. Now crash dump + * kernel support at most two regions, low region and high region. + * To make compatibility with existing user-space and older kdump, the low + * region is always the last range of linux,usable-memory-range if exist. + */ +#define MAX_USABLE_RANGES 2 + #ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel @@ -272,9 +281,9 @@ early_param("mem", early_mem); static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { - struct memblock_region *usablemem = data; - const __be32 *reg; - int len; + struct memblock_region *usable_rgns = data; + const __be32 *reg, *endp; + int len, nr = 0; if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; @@ -283,22 +292,36 @@ static int __init early_init_dt_scan_usablemem(unsigned long node, if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) return 1; - usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); - usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); + endp = reg + (len / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + usable_rgns[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); + usable_rgns[nr].size = dt_mem_next_cell(dt_root_size_cells, ); + + if (++nr >= MAX_USABLE_RANGES) + break; + } return 1; } static void __init fdt_enforce_memory_region(void) { - struct memblock_region reg = { - .size = 0, + struct memblock_region usable_rgns[MAX_USABLE_RANGES] = { + { .size = 0 }, + { .size = 0 } }; - of_scan_flat_dt(early_init_dt_scan_usablemem, ); + of_scan_flat_dt(early_init_dt_scan_usablemem, _rgns); - if (reg.size) - memblock_cap_memory_range(reg.base, reg.size); + /* +* The first range of usable-memory regions is for crash dump +* kernel with only one region or for high region with two regions, +* the second range is dedicated for low region if exist. +*/ + if (usable_rgns[0].size) + memblock_cap_memory_range(usable_rgns[0].base, usable_rgns[0].size); + if (usable_rgns[1].size) + memblock_add(usable_rgns[1].base, usable_rgns[1].size); } void __init arm64_memblock_init(void) -- 2.20.1
[PATCH v9 2/5] arm64: kdump: reserve crashkenel above 4G for crash dump kernel
Crashkernel=X tries to reserve memory for the crash dump kernel under 4G. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. Suggested by James, just introduced crashkernel=X,low to arm64. As memtioned above, if crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G, which is much simpler. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/arm64/kernel/setup.c | 8 +++- arch/arm64/mm/init.c | 31 +-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..4dc51a2ac012 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,13 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" region in /proc/iomem. +* Note: the low region is renamed as Crash kernel (low). +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) + request_resource(res, _low_res); if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..ce7ced85f5fb 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -81,6 +81,7 @@ static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; int ret; + phys_addr_t crash_max = arm64_dma32_phys_limit; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), _size, _base); @@ -88,12 +89,38 @@ static void __init reserve_crashkernel(void) if (ret || !crash_size) return; + ret = reserve_crashkernel_low(); + if (!ret && crashk_low_res.end) { + /* +* If crashkernel=X,low specified, there may be two regions, +* we need to make some changes as follows: +* +* 1. rename the low region as "Crash kernel (low)" +* In order to distinct from the high region and make no effect +* to the use of existing kexec-tools, rename the low region as +* "Crash kernel (low)". +* +* 2. change the upper bound for crash memory +* Set MEMBLOCK_ALLOC_ACCESSIBLE upper bound for crash memory. +* +* 3. mark the low region as "nomap" +* The low region is intended to be used for crash dump kernel +* devices, just mark the low region as "nomap" simply. +*/ + const char *rename = "Crash kernel (low)"; + + crashk_low_res.name = rename; + crash_max = MEMBLOCK_ALLOC_ACCESSIBLE; + memblock_mark_nomap(crashk_low_res.start, + resource_size(_low_res)); + } + crash_size = PAGE_ALIGN(crash_size); if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(0, crash_max, crash_size, + SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); -- 2.20.1
[PATCH v9 5/5] kdump: update Documentation about crashkernel on arm64
Now we support crashkernel=X,[low] on arm64, update the Documentation. We could use parameters "crashkernel=X crashkernel=Y,low" to reserve memory above 4G. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- Documentation/admin-guide/kdump/kdump.rst | 13 +++-- Documentation/admin-guide/kernel-parameters.txt | 17 +++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 2da65fef2a1c..6ba294d425c9 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,13 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=Y[@X]" to select a region under 4G first, and + fall back to reserve region above 4G when '@offset' hasn't been specified. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +322,11 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of + On arm64, use "crashkernel=Y[@X]". Note that the start address of the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + If crashkernel=Z,low is specified simultaneously, reserve spcified size + low memory for crash kdump kernel devices firstly and then reserve memory + above 4G. Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb95fad81c79..335431a351c0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -722,6 +722,9 @@ [KNL, x86_64] select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] If crashkernel=X,low is specified, reserve + spcified size low memory for crash kdump kernel devices + firstly, and then reserve memory above 4G. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -746,13 +749,23 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at - at least 256M below 4G automatically. + least 256M below 4G automatically. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - + [KNL, arm64] range under 4G. + This one let user to specify own low range under 4G + for crash dump kernel instead. + Different with x86_64, kernel allocates specified size + physical memory region only when this parameter is specified + instead of trying to allocate at least 256M below 4G + automatically. + This parameter is used along with crashkernel=X when we + want to reserve crashkernel above 4G. If there are devices + need to use ZONE_DMA in crash dump kernel, it is also + a good choice. cryptomgr.notests [KNL] Disable crypto self-tests -- 2.20.1
[PATCH v9 4/5] arm64: kdump: fix kdump broken with ZONE_DMA reintroduced
commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") broken the arm64 kdump. If the memory reserved for crash dump kernel falled in ZONE_DMA32, the devices in crash dump kernel need to use ZONE_DMA will alloc fail. This patch addressed the above issue based on "reserving crashkernel above 4G". Originally, we reserve low memory below 4G, and now just need to adjust memory limit to arm64_dma_phys_limit in reserve_crashkernel_low if ZONE_DMA is enabled. That is, if there are devices need to use ZONE_DMA in crash dump kernel, it is a good choice to use parameters "crashkernel=X crashkernel=Y,low". Signed-off-by: Chen Zhou --- kernel/crash_core.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index a7580d291c37..e8ecbbc761a3 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -320,6 +320,7 @@ int __init reserve_crashkernel_low(void) unsigned long long base, low_base = 0, low_size = 0; unsigned long total_low_mem; int ret; + phys_addr_t crash_max = 1ULL << 32; total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); @@ -352,7 +353,11 @@ int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); +#ifdef CONFIG_ARM64 + if (IS_ENABLED(CONFIG_ZONE_DMA)) + crash_max = arm64_dma_phys_limit; +#endif + low_base = memblock_find_in_range(0, crash_max, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); -- 2.20.1
[PATCH] docs/zh_CN: update sysfs.txt about show() usage
Update the show() usage according to the English version. Signed-off-by: Chen Zhou --- Documentation/translations/zh_CN/filesystems/sysfs.txt | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt index fcf620049d11..9481e3ed2a06 100644 --- a/Documentation/translations/zh_CN/filesystems/sysfs.txt +++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt @@ -213,10 +213,12 @@ Sysfs 将会为每次读写操作调用一次这个方法。这使得这些方 - 缓冲区应总是 PAGE_SIZE 大小。对于i386,这个值为4096。 -- show() 方法应该返回写入缓冲区的字节数,也就是 snprintf()的 +- show() 方法应该返回写入缓冲区的字节数,也就是 scnprintf()的 返回值。 -- show() 应始终使用 snprintf()。 +- show() 方法在将格式化返回值返回用户空间的时候,禁止使用snprintf()。 + 如果可以保证不会发生缓冲区溢出,可以使用sprintf(),否则必须使用 + scnprintf()。 - store() 应返回缓冲区的已用字节数。如果整个缓存都已填满,只需返回 count 参数。 -- 2.20.1
[PATCH v8 5/5] dt-bindings: chosen: Document linux,low-memory-range for arm64 kdump
Add documentation for DT property used by arm64 kdump: linux,low-memory-range. "linux,low-memory-range" is an another memory region used for crash dump kernel devices. Signed-off-by: Chen Zhou --- Documentation/devicetree/bindings/chosen.txt | 25 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt index 45e79172a646..bfe6fb6976e6 100644 --- a/Documentation/devicetree/bindings/chosen.txt +++ b/Documentation/devicetree/bindings/chosen.txt @@ -103,6 +103,31 @@ While this property does not represent a real hardware, the address and the size are expressed in #address-cells and #size-cells, respectively, of the root node. +linux,low-memory-range +-- +This property (arm64 only) holds a base address and size, describing a +limited region below 4G. Similar to "linux,usable-memory-range", it is +an another memory range which may be considered available for use by the +kernel. + +e.g. + +/ { + chosen { + linux,low-memory-range = <0x0 0x7000 0x0 0x1000>; + linux,usable-memory-range = <0x202f 0xc000 0x0 0x4000>; + }; +}; + +The main usage is for crash dump kernel devices when reserving crashkernel +above 4G. When reserving crashkernel above 4G, there may be two crash kernel +regions, one is below 4G, the other is above 4G. In order to distinct from +the high region, use this property to pass the low region. + +While this property does not represent a real hardware, the address +and the size are expressed in #address-cells and #size-cells, +respectively, of the root node. + linux,elfcorehdr -- 2.20.1
[PATCH v8 3/5] arm64: kdump: add memory for devices by DT property, low-memory-range
If we want to reserve crashkernel above 4G, we could use parameters "crashkernel=X crashkernel=Y,low", in this case, specified size low memory is reserved for crash dump kernel devices and never mapped by the first kernel. This memory range is advertised to crash dump kernel via DT property under /chosen, linux,low-memory-range= Crash dump kernel reads this property at boot time and call memblock_add() after memblock_cap_memory_range() has been called. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/arm64/mm/init.c | 30 +- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 71498acf0cd8..fcc3abee7003 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -323,6 +323,26 @@ static int __init early_mem(char *p) } early_param("mem", early_mem); +static int __init early_init_dt_scan_lowmem(unsigned long node, + const char *uname, int depth, void *data) +{ + struct memblock_region *lowmem = data; + const __be32 *reg; + int len; + + if (depth != 1 || strcmp(uname, "chosen") != 0) + return 0; + + reg = of_get_flat_dt_prop(node, "linux,low-memory-range", ); + if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) + return 1; + + lowmem->base = dt_mem_next_cell(dt_root_addr_cells, ); + lowmem->size = dt_mem_next_cell(dt_root_size_cells, ); + + return 1; +} + static int __init early_init_dt_scan_usablemem(unsigned long node, const char *uname, int depth, void *data) { @@ -353,13 +373,21 @@ static void __init fdt_enforce_memory_region(void) if (reg.size) memblock_cap_memory_range(reg.base, reg.size); + + of_scan_flat_dt(early_init_dt_scan_lowmem, ); + + if (reg.size) + memblock_add(reg.base, reg.size); } void __init arm64_memblock_init(void) { const s64 linear_region_size = BIT(vabits_actual - 1); - /* Handle linux,usable-memory-range property */ + /* +* Handle linux,usable-memory-range and linux,low-memory-range +* properties. +*/ fdt_enforce_memory_region(); /* Remove memory above our supported physical address size */ -- 2.20.1
[PATCH v8 0/5] support reserving crashkernel above 4G on arm64 kdump
This patch series enable reserving crashkernel above 4G in arm64. There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel below 4G, which will fail when there is no enough low memory. 2. Currently, crashkernel=Y@X can be used to reserve crashkernel above 4G, in this case, if swiotlb or DMA buffers are required, crash dump kernel will boot failure because there is no low memory available for allocation. To solve these issues, introduce crashkernel=X,low to reserve specified size low memory. Crashkernel=X tries to reserve memory for the crash dump kernel under 4G. If crashkernel=Y,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. When crashkernel is reserved above 4G in memory, that is, crashkernel=X,low is specified simultaneously, kernel should reserve specified size low memory for crash dump kernel devices. So there may be two crash kernel regions, one is below 4G, the other is above 4G. In order to distinct from the high region and make no effect to the use of kexec-tools, rename the low region as "Crash kernel (low)", and add DT property "linux,low-memory-range" to crash dump kernel's dtb to pass the low region. Besides, we need to modify kexec-tools: arm64: kdump: add another DT property to crash dump kernel's dtb(see [1]) The previous changes and discussions can be retrieved from: Changes since [v7] - Move x86 CRASH_ALIGN to 2M Suggested by Dave and do some test, move x86 CRASH_ALIGN to 2M. - Update Documentation/devicetree/bindings/chosen.txt Add corresponding documentation to Documentation/devicetree/bindings/chosen.txt suggested by Arnd. - Add Tested-by from Jhon and pk Changes since [v6] - Fix build errors reported by kbuild test robot. Changes since [v5] - Move reserve_crashkernel_low() into kernel/crash_core.c. - Delete crashkernel=X,high. - Modify crashkernel=X,low. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. In addition, rename crashk_low_res as "Crash kernel (low)" for arm64, and then pass to crash dump kernel by DT property "linux,low-memory-range". - Update Documentation/admin-guide/kdump/kdump.rst. Changes since [v4] - Reimplement memblock_cap_memory_ranges for multiple ranges by Mike. Changes since [v3] - Add memblock_cap_memory_ranges back for multiple ranges. - Fix some compiling warnings. Changes since [v2] - Split patch "arm64: kdump: support reserving crashkernel above 4G" as two. Put "move reserve_crashkernel_low() into kexec_core.c" in a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2020-May/025128.html [v1]: https://lkml.org/lkml/2019/4/2/1174 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/9/306 [v4]: https://lkml.org/lkml/2019/4/15/273 [v5]: https://lkml.org/lkml/2019/5/6/1360 [v6]: https://lkml.org/lkml/2019/8/30/142 [v7]: https://lkml.org/lkml/2019/12/23/411 Chen Zhou (5): x86: kdump: move reserve_crashkernel_low() into crash_core.c arm64: kdump: reserve crashkenel above 4G for crash dump kernel arm64: kdump: add memory for devices by DT property, low-memory-range kdump: update Documentation about crashkernel on arm64 dt-bindings: chosen: Document linux,low-memory-range for arm64 kdump Documentation/admin-guide/kdump/kdump.rst | 13 ++- .../admin-guide/kernel-parameters.txt | 12 ++- Documentation/devicetree/bindings/chosen.txt | 25 ++ arch/arm64/kernel/setup.c | 8 +- arch/arm64/mm/init.c | 61 - arch/x86/kernel/setup.c | 66 ++ include/linux/crash_core.h| 3 + include/linux/kexec.h | 2 - kernel/crash_core.c | 85 +++ kernel/kexec_core.c | 17 10 files changed, 208 insertions(+), 84 deletions(-) -- 2.20.1
[PATCH v8 4/5] kdump: update Documentation about crashkernel on arm64
Now we support crashkernel=X,[low] on arm64, update the Documentation. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- Documentation/admin-guide/kdump/kdump.rst | 13 +++-- Documentation/admin-guide/kernel-parameters.txt | 12 +++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index ac7e131d2935..e55173ec1666 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -299,7 +299,13 @@ Boot into System Kernel "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory starting at physical address 0x0100 (16MB) for the dump-capture kernel. - On x86 and x86_64, use "crashkernel=64M@16M". + On x86 use "crashkernel=64M@16M". + + On x86_64, use "crashkernel=Y[@X]" to select a region under 4G first, and + fall back to reserve region above 4G when '@offset' hasn't been specified. + We can also use "crashkernel=X,high" to select a region above 4G, which + also tries to allocate at least 256M below 4G automatically and + "crashkernel=Y,low" can be used to allocate specified size low memory. On ppc64, use "crashkernel=128M@32M". @@ -316,8 +322,11 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. - On arm64, use "crashkernel=Y[@X]". Note that the start address of + On arm64, use "crashkernel=Y[@X]". Note that the start address of the kernel, X if explicitly specified, must be aligned to 2MiB (0x20). + If crashkernel=Z,low is specified simultaneously, reserve spcified size + low memory for crash kdump kernel devices firstly and then reserve memory + above 4G. Load the Dump-capture Kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7bc83f3d9bdf..97695783b817 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -722,6 +722,9 @@ [KNL, x86_64] select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. + [KNL, arm64] If crashkernel=X,low is specified, reserve + spcified size low memory for crash kdump kernel devices + firstly, and then reserve memory above 4G. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -746,12 +749,19 @@ requires at least 64M+32K low memory, also enough extra low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at - at least 256M below 4G automatically. + least 256M below 4G automatically. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. + [KNL, arm64] range under 4G. + This one let user to specify own low range under 4G + for crash dump kernel instead. + Different with x86_64, kernel allocates specified size + physical memory region only when this parameter is specified + instead of trying to allocate at least 256M below 4G + automatically. cryptomgr.notests [KNL] Disable crypto self-tests -- 2.20.1
[PATCH v8 1/5] x86: kdump: move reserve_crashkernel_low() into crash_core.c
In preparation for supporting reserve_crashkernel_low in arm64 as x86_64 does, move reserve_crashkernel_low() into kernel/crash_core.c. BTW, move x86 CRASH_ALIGN to 2M. Note, in arm64, we reserve low memory if and only if crashkernel=X,low is specified. Different with x86_64, don't set low memory automatically. Reported-by: kbuild test robot Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/x86/kernel/setup.c| 66 - include/linux/crash_core.h | 3 ++ include/linux/kexec.h | 2 - kernel/crash_core.c| 85 ++ kernel/kexec_core.c| 17 5 files changed, 96 insertions(+), 77 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4b3fa6cd3106..de75fec73d47 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -395,8 +395,8 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGNSZ_16M +/* 2M alignment for crash kernel regions */ +#define CRASH_ALIGNSZ_2M /* * Keep the crash kernel below this limit. @@ -419,59 +419,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) # define CRASH_ADDR_HIGH_MAX SZ_64T #endif -static int __init reserve_crashkernel_low(void) -{ -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long total_low_mem; - int ret; - - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); - if (ret) { - /* -* two parts from kernel/dma/swiotlb.c: -* -swiotlb size: user-specified with swiotlb= or default. -* -* -swiotlb overflow buffer: now hardcoded to 32k. We round it -* to 8M for other buffers that may need to stay low too. Also -* make sure we allocate enough extra low memory so that we -* don't run out of DMA buffers for 32-bit devices. -*/ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - ret = memblock_reserve(low_base, low_size); - if (ret) { - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); - return ret; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(total_low_mem >> 20)); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - insert_resource(_resource, _low_res); -#endif - return 0; -} - static void __init reserve_crashkernel(void) { unsigned long long crash_size, crash_base, total_mem; @@ -535,9 +482,12 @@ static void __init reserve_crashkernel(void) return; } - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); - return; + if (crash_base >= (1ULL << 32)) { + if (reserve_crashkernel_low()) { + memblock_free(crash_base, crash_size); + return; + } + insert_resource(_resource, _low_res); } pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..4df8c0bff03e 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -63,6 +63,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; +extern struct resource crashk_res; +extern struct resource crashk_low_res; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); @@ -74,5 +76,6 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_lo
[PATCH v8 2/5] arm64: kdump: reserve crashkenel above 4G for crash dump kernel
Crashkernel=X tries to reserve memory for the crash dump kernel under 4G. If crashkernel=X,low is specified simultaneously, reserve spcified size low memory for crash kdump kernel devices firstly and then reserve memory above 4G. Signed-off-by: Chen Zhou Tested-by: John Donnelly Tested-by: Prabhakar Kushwaha --- arch/arm64/kernel/setup.c | 8 +++- arch/arm64/mm/init.c | 31 +-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3fd2c11c09fc..a8487e4d3e5a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -238,7 +238,13 @@ static void __init request_standard_resources(void) kernel_data.end <= res->end) request_resource(res, _data); #ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ + /* +* Userspace will find "Crash kernel" region in /proc/iomem. +* Note: the low region is renamed as Crash kernel (low). +*/ + if (crashk_low_res.end && crashk_low_res.start >= res->start && + crashk_low_res.end <= res->end) + request_resource(res, _low_res); if (crashk_res.end && crashk_res.start >= res->start && crashk_res.end <= res->end) request_resource(res, _res); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e42727e3568e..71498acf0cd8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -81,6 +81,7 @@ static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; int ret; + phys_addr_t crash_max = arm64_dma32_phys_limit; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), _size, _base); @@ -88,12 +89,38 @@ static void __init reserve_crashkernel(void) if (ret || !crash_size) return; + ret = reserve_crashkernel_low(); + if (!ret && crashk_low_res.end) { + /* +* If crashkernel=X,low specified, there may be two regions, +* we need to make some changes as follows: +* +* 1. rename the low region as "Crash kernel (low)" +* In order to distinct from the high region and make no effect +* to the use of existing kexec-tools, rename the low region as +* "Crash kernel (low)". +* +* 2. change the upper bound for crash memory +* Set MEMBLOCK_ALLOC_ACCESSIBLE upper bound for crash memory. +* +* 3. mark the low region as "nomap" +* The low region is intended to be used for crash dump kernel +* devices, just mark the low region as "nomap" simply. +*/ + const char *rename = "Crash kernel (low)"; + + crashk_low_res.name = rename; + crash_max = MEMBLOCK_ALLOC_ACCESSIBLE; + memblock_mark_nomap(crashk_low_res.start, + resource_size(_low_res)); + } + crash_size = PAGE_ALIGN(crash_size); if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, - crash_size, SZ_2M); + crash_base = memblock_find_in_range(0, crash_max, crash_size, + SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); -- 2.20.1
[PATCH -next 1/2] sparc: use snprintf() in show_pciobppath_attr() in pci.c
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show_pciobppath_attr() should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Signed-off-by: Chen Zhou --- arch/sparc/kernel/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 5ed43828e078..a41ad562ed4e 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -593,7 +593,7 @@ show_pciobppath_attr(struct device * dev, struct device_attribute * attr, char * pdev = to_pci_dev(dev); dp = pdev->dev.of_node; - return snprintf (buf, PAGE_SIZE, "%pOF\n", dp); + return scnprintf(buf, PAGE_SIZE, "%pOF\n", dp); } static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, show_pciobppath_attr, NULL); -- 2.20.1
[PATCH -next 2/2] sparc: use snprintf() in show_pciobppath_attr() in vio.c
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show_pciobppath_attr() should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Signed-off-by: Chen Zhou --- arch/sparc/kernel/vio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index c7cad9b7bba7..4f57056ed463 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -193,7 +193,7 @@ show_pciobppath_attr(struct device *dev, struct device_attribute *attr, vdev = to_vio_dev(dev); dp = vdev->dp; - return snprintf (buf, PAGE_SIZE, "%pOF\n", dp); + return scnprintf(buf, PAGE_SIZE, "%pOF\n", dp); } static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, -- 2.20.1
[PATCH -next 0/2] sparc: use snprintf() in show() methods
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Chen Zhou (2): sparc: use snprintf() in show_pciobppath_attr() in pci.c sparc: use snprintf() in show_pciobppath_attr() in vio.c arch/sparc/kernel/pci.c | 2 +- arch/sparc/kernel/vio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- 2.20.1
[PATCH -next 3/3] s390/protvirt: use scnprintf() instead of snprintf()
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. uv_query_facilities() should return the number of bytes printed into the buffer. This is the return value of scnprintf(). The other functions are the same. Signed-off-by: Chen Zhou --- arch/s390/kernel/uv.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 4c0677fc8904..e7ea82a2bf63 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible); static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", uv_info.inst_calls_list[0], uv_info.inst_calls_list[1], uv_info.inst_calls_list[2], @@ -344,7 +344,7 @@ static struct kobj_attribute uv_query_facilities_attr = static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_guest_cpus); } @@ -354,7 +354,7 @@ static struct kobj_attribute uv_query_max_guest_cpus_attr = static ssize_t uv_query_max_guest_vms(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_num_sec_conf); } @@ -364,7 +364,7 @@ static struct kobj_attribute uv_query_max_guest_vms_attr = static ssize_t uv_query_max_guest_addr(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.max_sec_stor_addr); } -- 2.20.1
[PATCH -next 2/3] s390: use scnprintf() in sys_##_prefix##_##_name##_show
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Signed-off-by: Chen Zhou --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ccea9a245867..90a2a17239b0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -181,7 +181,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr,\ char *page) \ { \ - return snprintf(page, PAGE_SIZE, _format, ##args); \ + return scnprintf(page, PAGE_SIZE, _format, ##args); \ } #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ -- 2.20.1
[PATCH -next 1/3] s390/crypto: use scnprintf() instead of snprintf()
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Signed-off-by: Chen Zhou --- arch/s390/crypto/prng.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index d977643fa627..e1ae23911ccd 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -693,7 +693,7 @@ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); } static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); @@ -712,7 +712,7 @@ static ssize_t prng_counter_show(struct device *dev, counter = prng_data->prngws.byte_counter; mutex_unlock(_data->mutex); - return snprintf(buf, PAGE_SIZE, "%llu\n", counter); + return scnprintf(buf, PAGE_SIZE, "%llu\n", counter); } static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); @@ -721,7 +721,7 @@ static ssize_t prng_errorflag_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); + return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); } static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); @@ -731,9 +731,9 @@ static ssize_t prng_mode_show(struct device *dev, char *buf) { if (prng_mode == PRNG_MODE_TDES) - return snprintf(buf, PAGE_SIZE, "TDES\n"); + return scnprintf(buf, PAGE_SIZE, "TDES\n"); else - return snprintf(buf, PAGE_SIZE, "SHA512\n"); + return scnprintf(buf, PAGE_SIZE, "SHA512\n"); } static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); @@ -756,7 +756,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); } static ssize_t prng_reseed_limit_store(struct device *dev, struct device_attribute *attr, @@ -787,7 +787,7 @@ static ssize_t prng_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "256\n"); + return scnprintf(buf, PAGE_SIZE, "256\n"); } static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); -- 2.20.1
[PATCH -next 0/3] s390: use scnprintf() in show() methods
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Chen Zhou (3): s390/crypto: use scnprintf() instead of snprintf() s390: use scnprintf() in sys_##_prefix##_##_name##_show s390/protvirt: use scnprintf() instead of snprintf() arch/s390/crypto/prng.c | 14 +++--- arch/s390/kernel/ipl.c | 2 +- arch/s390/kernel/uv.c | 8 3 files changed, 12 insertions(+), 12 deletions(-) -- 2.20.1
[PATCH -next] arch/arm: use scnprintf() in l2x0_pmu_event_show()
snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Signed-off-by: Chen Zhou --- arch/arm/mm/cache-l2x0-pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c index 993fefdc167a..d20626451a2e 100644 --- a/arch/arm/mm/cache-l2x0-pmu.c +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -343,7 +343,7 @@ static ssize_t l2x0_pmu_event_show(struct device *dev, struct l2x0_event_attribute *lattr; lattr = container_of(attr, typeof(*lattr), attr); - return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config); + return scnprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config); } static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj, -- 2.20.1
[PATCH -next] powerpc/powernv: add NULL check after kzalloc
Fixes coccicheck warning: ./arch/powerpc/platforms/powernv/opal.c:813:1-5: alloc with no test, possible model on line 814 Add NULL check after kzalloc. Signed-off-by: Chen Zhou --- arch/powerpc/platforms/powernv/opal.c | 4 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2b3dfd0b6cdd..d95954ad4c0a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -811,6 +811,10 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name, goto out; attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) { + rc = -ENOMEM; + goto out; + } name = kstrdup(export_name, GFP_KERNEL); if (!name) { rc = -ENOMEM; -- 2.20.1
[PATCH -next] media: coda: jpeg: add NULL check after kmalloc
Fixes coccicheck warning: ./drivers/media/platform/coda/coda-jpeg.c:331:3-31: alloc with no test, possible model on line 354 Add NULL check after kmalloc. Signed-off-by: Chen Zhou --- drivers/media/platform/coda/coda-jpeg.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/coda/coda-jpeg.c b/drivers/media/platform/coda/coda-jpeg.c index 00d19859db50..b11cfbe166dd 100644 --- a/drivers/media/platform/coda/coda-jpeg.c +++ b/drivers/media/platform/coda/coda-jpeg.c @@ -327,8 +327,11 @@ int coda_jpeg_decode_header(struct coda_ctx *ctx, struct vb2_buffer *vb) "only 8-bit quantization tables supported\n"); continue; } - if (!ctx->params.jpeg_qmat_tab[i]) + if (!ctx->params.jpeg_qmat_tab[i]) { ctx->params.jpeg_qmat_tab[i] = kmalloc(64, GFP_KERNEL); + if (!ctx->params.jpeg_qmat_tab[i]) + return -ENOMEM; + } memcpy(ctx->params.jpeg_qmat_tab[i], quantization_tables[i].start, 64); } -- 2.20.1
[PATCH -next] sunrpc: use kmemdup_nul() in gssp_stringify()
It is more efficient to use kmemdup_nul() if the size is known exactly . According to doc: "Note: Use kmemdup_nul() instead if the size is known exactly." Signed-off-by: Chen Zhou --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 0349f455a862..af9c7f43859c 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -223,7 +223,7 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) static char *gssp_stringify(struct xdr_netobj *netobj) { - return kstrndup(netobj->data, netobj->len, GFP_KERNEL); + return kmemdup_nul(netobj->data, netobj->len, GFP_KERNEL); } static void gssp_hostbased_service(char **principal) -- 2.20.1
[PATCH -next] xfs: remove duplicate headers
Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou --- fs/xfs/xfs_xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index fc5d7276026e..bca48b308c02 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -12,7 +12,6 @@ #include "xfs_inode.h" #include "xfs_attr.h" #include "xfs_acl.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" #include -- 2.20.1
[PATCH -next] NFS: remove duplicate headers
Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou --- fs/nfs/dns_resolve.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 963800037609..e87d500ad95a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -39,7 +39,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include #include #include -#include #include #include #include -- 2.20.1
[PATCH -next] PCI: endpoint: use kmemdup_nul() in pci_epf_create()
It is more efficient to use kmemdup_nul() if the size is known exactly. The doc in kernel: "Note: Use kmemdup_nul() instead if the size is known exactly." Signed-off-by: Chen Zhou --- drivers/pci/endpoint/pci-epf-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 244e00f48c5c..f035d2ebcae5 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -252,7 +252,7 @@ struct pci_epf *pci_epf_create(const char *name) return ERR_PTR(-ENOMEM); len = strchrnul(name, '.') - name; - epf->name = kstrndup(name, len, GFP_KERNEL); + epf->name = kmemdup_nul(name, len, GFP_KERNEL); if (!epf->name) { kfree(epf); return ERR_PTR(-ENOMEM); -- 2.20.1
[PATCH -next] nvmet: replace kstrndup() with kmemdup_nul()
It is more efficient to use kmemdup_nul() if the size is known exactly. The doc in kernel: "Note: Use kmemdup_nul() instead if the size is known exactly." Signed-off-by: Chen Zhou --- drivers/nvme/target/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 58cabd7b6fc5..9894668a40e7 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -327,7 +327,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, kfree(ns->device_path); ret = -ENOMEM; - ns->device_path = kstrndup(page, len, GFP_KERNEL); + ns->device_path = kmemdup_nul(page, len, GFP_KERNEL); if (!ns->device_path) goto out_unlock; @@ -963,7 +963,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, return -EINVAL; } - new_model_number = kstrndup(page, len, GFP_KERNEL); + new_model_number = kmemdup_nul(page, len, GFP_KERNEL); if (!new_model_number) return -ENOMEM; -- 2.20.1
[PATCH -next] iommu: remove set but not used variable 'data'
Fixes gcc '-Wunused-but-set-variable' warning: drivers/iommu/mtk_iommu_v1.c:467:25: warning: variable ‘data’ set but not used [-Wunused-but-set-variable] struct mtk_iommu_data *data; Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/iommu/mtk_iommu_v1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 7bdd74c7cb9f..36cc1d9667a2 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -464,12 +464,11 @@ static void mtk_iommu_probe_finalize(struct device *dev) static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; if (!fwspec || fwspec->ops != _iommu_ops) return; - data = dev_iommu_priv_get(dev); + dev_iommu_priv_get(dev); iommu_fwspec_free(dev); } -- 2.20.1
[PATCH -next] iommu/arm-smmu-v3: remove set but not used variable 'smmu'
Fixes gcc '-Wunused-but-set-variable' warning: drivers/iommu/arm-smmu-v3.c:2989:26: warning: variable ‘smmu’ set but not used [-Wunused-but-set-variable] struct arm_smmu_device *smmu; Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/iommu/arm-smmu-v3.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 42e1ee7e5197..89ee9c5d8b88 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2986,13 +2986,11 @@ static void arm_smmu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_master *master; - struct arm_smmu_device *smmu; if (!fwspec || fwspec->ops != _smmu_ops) return; master = dev_iommu_priv_get(dev); - smmu = master->smmu; arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); kfree(master); -- 2.20.1
[PATCH -next] brcmfmac: make non-global functions static
Fix sparse warning: drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c:2206:5: warning: symbol 'brcmf_p2p_get_conn_idx' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Chen Zhou --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index e32c24a2670d..2a2440031357 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -2203,7 +2203,7 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p, return ERR_PTR(err); } -int brcmf_p2p_get_conn_idx(struct brcmf_cfg80211_info *cfg) +static int brcmf_p2p_get_conn_idx(struct brcmf_cfg80211_info *cfg) { int i; struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg)); -- 2.20.1
[PATCH -next] drm/amd/display: remove duplicate headers
Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 9ef9e50a34fa..1db592372435 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -13,7 +13,6 @@ #include "core_status.h" #include "dpcd_defs.h" -#include "resource.h" #define DC_LOGGER \ link->ctx->logger -- 2.20.1
[PATCH -next] net: ipa: remove duplicate headers
Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou --- drivers/net/ipa/ipa_mem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index aa8f6b0f3d50..3ef814119aab 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -17,7 +17,6 @@ #include "ipa_data.h" #include "ipa_cmd.h" #include "ipa_mem.h" -#include "ipa_data.h" #include "ipa_table.h" #include "gsi_trans.h" -- 2.20.1
[PATCH -next] cxgb4: remove duplicate headers
Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index e46a14f44a6f..30d25a37fc3b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -466,8 +466,6 @@ static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log, return &((struct mbox_cmd *)&(log)[1])[entry_idx]; } -#include "t4fw_api.h" - #define FW_VERSION(chip) ( \ FW_HDR_FW_VER_MAJOR_G(chip##FW_VERSION_MAJOR) | \ FW_HDR_FW_VER_MINOR_G(chip##FW_VERSION_MINOR) | \ -- 2.20.1
[PATCH -next] kernel.h: remove duplicate headers
Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 04a5885cec1b..12fe1afebfd9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,7 +17,6 @@ #include #include #include -#include #define STACK_MAGIC0xdeadbeef -- 2.20.1
Re: [PATCH 2/4] arm64: kdump: support reserving crashkernel above 4G
Hi James, On 2019/6/6 0:29, James Morse wrote: > Hello, > > On 07/05/2019 04:50, Chen Zhou wrote: >> When crashkernel is reserved above 4G in memory, kernel should >> reserve some amount of low memory for swiotlb and some DMA buffers. > >> Meanwhile, support crashkernel=X,[high,low] in arm64. When use >> crashkernel=X parameter, try low memory first and fall back to high >> memory unless "crashkernel=X,high" is specified. > > What is the 'unless crashkernel=...,high' for? I think it would be simpler to > relax the > ARCH_LOW_ADDRESS_LIMIT if reserve_crashkernel_low() allocated something. > > This way "crashkernel=1G" tries to allocate 1G below 4G, but fails if there > isn't enough > memory. "crashkernel=1G crashkernel=16M,low" allocates 16M below 4G, which is > more likely > to succeed, if it does it can then place the 1G block anywhere. > Yeah, this is much simpler. > >> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c >> index 413d566..82cd9a0 100644 >> --- a/arch/arm64/kernel/setup.c >> +++ b/arch/arm64/kernel/setup.c >> @@ -243,6 +243,9 @@ static void __init request_standard_resources(void) >> request_resource(res, _data); >> #ifdef CONFIG_KEXEC_CORE >> /* Userspace will find "Crash kernel" region in /proc/iomem. */ >> +if (crashk_low_res.end && crashk_low_res.start >= res->start && >> +crashk_low_res.end <= res->end) >> +request_resource(res, _low_res); >> if (crashk_res.end && crashk_res.start >= res->start && >> crashk_res.end <= res->end) >> request_resource(res, _res); > > With both crashk_low_res and crashk_res, we end up with two entries in > /proc/iomem called > "Crash kernel". Because its sorted by address, and kexec-tools stops > searching when it > find "Crash kernel", you are always going to get the kernel placed in the > lower portion. > > I suspect this isn't what you want, can we rename crashk_low_res for arm64 so > that > existing kexec-tools doesn't use it? > In my patchset, in addition to the kernel patches, i also modify the kexec-tools. arm64: support more than one crash kernel regions(http://lists.infradead.org/pipermail/kexec/2019-April/022792.html). In kexec-tools patch, we read all the "Crash kernel" entry and load crash kernel high. > >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c >> index d2adffb..3fcd739 100644 >> --- a/arch/arm64/mm/init.c >> +++ b/arch/arm64/mm/init.c >> @@ -74,20 +74,37 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; >> static void __init reserve_crashkernel(void) >> { >> unsigned long long crash_base, crash_size; >> +bool high = false; >> int ret; >> >> ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), >> _size, _base); >> /* no crashkernel= or invalid value specified */ >> -if (ret || !crash_size) >> -return; >> +if (ret || !crash_size) { >> +/* crashkernel=X,high */ >> +ret = parse_crashkernel_high(boot_command_line, >> +memblock_phys_mem_size(), >> +_size, _base); >> +if (ret || !crash_size) >> +return; >> +high = true; >> +} >> >> crash_size = PAGE_ALIGN(crash_size); >> >> if (crash_base == 0) { >> -/* Current arm64 boot protocol requires 2MB alignment */ >> -crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, >> -crash_size, SZ_2M); >> +/* >> + * Try low memory first and fall back to high memory >> + * unless "crashkernel=size[KMG],high" is specified. >> + */ >> +if (!high) >> +crash_base = memblock_find_in_range(0, >> +ARCH_LOW_ADDRESS_LIMIT, >> +crash_size, CRASH_ALIGN); >> +if (!crash_base) >> +crash_base = memblock_find_in_range(0, >> +memblock_end_of_DRAM(), >> +crash_size, CRASH_ALIGN); >> if (crash_base == 0) { >> pr_warn("cannot allocate crashkernel (size
Re: [PATCH 0/4] support reserving crashkernel above 4G on arm64 kdump
On 2019/6/6 0:32, James Morse wrote: > Hi! > > On 07/05/2019 04:50, Chen Zhou wrote: >> We use crashkernel=X to reserve crashkernel below 4G, which will fail >> when there is no enough memory. Currently, crashkernel=Y@X can be used >> to reserve crashkernel above 4G, in this case, if swiotlb or DMA buffers >> are requierd, capture kernel will boot failure because of no low memory. > >> When crashkernel is reserved above 4G in memory, kernel should reserve >> some amount of low memory for swiotlb and some DMA buffers. So there may >> be two crash kernel regions, one is below 4G, the other is above 4G. > > This is a good argument for supporting the 'crashkernel=...,low' version. > What is the 'crashkernel=...,high' version for? > > Wouldn't it be simpler to relax the ARCH_LOW_ADDRESS_LIMIT if we see > 'crashkernel=...,low' > in the kernel cmdline? > > I don't see what the 'crashkernel=...,high' variant is giving us, it just > complicates the > flow of reserve_crashkernel(). > > If we called reserve_crashkernel_low() at the beginning of > reserve_crashkernel() we could > use crashk_low_res.end to change some limit variable from > ARCH_LOW_ADDRESS_LIMIT to > memblock_end_of_DRAM(). > I think this is a simpler change that gives you what you want. According to your suggestions, we should do like this: 1. call reserve_crashkernel_low() at the beginning of reserve_crashkernel() 2. mark the low region as 'nomap' 3. use crashk_low_res.end to change some limit variable from ARCH_LOW_ADDRESS_LIMIT to memblock_end_of_DRAM() 4. rename crashk_low_res as "Crash kernel (low)" for arm64 5. add an 'linux,low-memory-range' node in DT Do i understand correctly? > > >> Then >> Crash dump kernel reads more than one crash kernel regions via a dtb >> property under node /chosen, >> linux,usable-memory-range = . > > Won't this break if your kdump kernel doesn't know what the extra parameters > are? > Or if it expects two ranges, but only gets one? These DT properties should be > treated as > ABI between kernel versions, we can't really change it like this. > > I think the 'low' region is an optional-extra, that is never mapped by the > first kernel. I > think the simplest thing to do is to add an 'linux,low-memory-range' that we > memblock_add() after memblock_cap_memory_range() has been called. > If its missing, or the new kernel doesn't know what its for, everything keeps > working. > > >> Besides, we need to modify kexec-tools: >> arm64: support more than one crash kernel regions(see [1]) > >> I post this patch series about one month ago. The previous changes and >> discussions can be retrived from: > > Ah, this wasn't obvious as you've stopped numbering the series. Please label > the next one > 'v6' so that we can describe this as 'v5'. (duplicate numbering would be even > more confusing!) > ok. > > Thanks, > > James > > . > Thanks, Chen Zhou
Re: [Question] panic when write file cpuset.cpus
+Cc linux-kernel@vger.kernel.org On 2019/6/6 11:04, Chen Zhou wrote: > Hi all, > > I hit the following issue in linux 4.4 which is hard to reproduce. > > [20190527221106][bsp_pci_device_get_bar]--- pBasePhyAddr :3a00800, > len:400 --- > [20190527221106]Unable to handle kernel paging request at virtual address > 10010 > [20190527221107]pgd = ffd3c696b000 > [20190527221107][10010] *pgd=, *pud= > [20190527221107]Internal error: Oops: 9605 [#1] PREEMPT SMP > [20190527221107]Modules linked in: linux_user_bde(O) linux_kernel_bde(O) > cmac(O) nse(O) pp(O) tm(O) lfe(O) tipc(O) pcie_aer_hisi(O) brd_drv_lpu(O) > hi161x_glf(O) hi161x_glc(O) chip_sdk_adpt(O) bonding(O) memenv(O) iof_sal(O) > iof_dlog(O) iof_devent(O) iof_iomm(O) drv_bsp_pic(O) bsp_common(O) > pramdisk(O) bsp_proc(O) kdc_uio_log(O) vrp_env_log_area(O) drv_bsp_fmea(O) > Drv_LastWds_K(O) Drv_CpuDfxInfo_K(O) Drv_CpuDfx_K(O) v8_dfx_cpu(O) > Drv_Dfx_K(O) Drv_CpuRegInject_K(O) Drv_ResetCause_K(O) Drv_Kbox_K(O) > env_core(O) iof_data(O) Drv_L2flush_K(O) arm64_cache_dfx(O) mmapdev(O) > drv_extern_int(O) irq_monitor(O) drv_bsp_avs(O) Drv_Pmbus_K(O) Drv_Smc_K(O) > bsp_sal(O) Drv_Ipsec_K(O) Drv_Tsensor_K(O) pci_hisi(O) serdes(O) > Drv_CheckBoot_K(O) Drv_Djtag_K(O) addr_win(O) iof_cbb(O) Drv_I2c_K(O) > hns_uio_enet(O) hns_enet_drv(O) hns_dsaf(O) hnae(O) hns_mdio(O) mdio(O) > Drv_FlowCtrl_K(O) Drv_Gpio_K(O) Drv_SysClk_K(O) physmap_of(O) map_rom(O) > cfi_cmdset_0002(O) cfi_probe(O) cfi_util(O) gen_probe(O) chipreg(O) rsm(O) > rtos_snapshot(O) rtos_kbox_panic(O) bsp_wdt(O) drv_bsp_ddr(O) bsp_reg(O) > Drv_Dts_K(O) Drv_SysCtl_K(O) arm_sal_issu(O) ksecurec(PO) ext4 jbd2 ext2 > mbcache ofpart i2c_dev i2c_core uio nand nand_ecc nand_ids cmdlinepart > mtdblock mtd_blkdevs mtd > [20190527221107]CPU: 2 PID: 2656 Comm: monitor Tainted: PW O > 4.4.171 #1 > [20190527221107]Hardware name: Hisilicon chip6_16 Product Board (DT) > [20190527221107]task: ffd3bf81c250 task.stack: ffd3bff0c000 > [20190527221107]PC is at rb_erase+0x14/0x320 > [20190527221107]LR is at erase_header+0x50/0x54 > [20190527221107]pc : [] lr : [] pstate: > 0145 > [20190527221107]sp : ffd3bff0f9a0 > [20190527221107]x29: ffd3bff0f9a0 x28: ff69fe8b1980 > [20190527221107]x27: 0001 x26: ff8008e7e340 > [20190527221107]x25: ff8008e05000 x24: ff8008e32e28 > [20190527221107]x23: ffd3c0542500 x22: ff8008e32000 > [20190527221107]x21: ff8008e05000 x20: ffd3c0542f00 > [20190527221107]x19: ffd3c0542fb8 x18: 000f > [20190527221107]x17: 007f9bd20e10 x16: ff8008367108 > [20190527221107]x15: 1fee x14: > [20190527221107]x13: x12: > [20190527221107]x11: 0001 x10: 0001 > [20190527221107]x9 : 0001 x8 : ff800894622d > [20190527221107]x7 : ffd3c65dd7d0 x6 : > [20190527221107]x5 : ffd3bf81a740 x4 : > [20190527221107]x3 : 00010001 x2 : 0001 > [20190527221107]x1 : ffd3c0542550 x0 : ffd3c0542f58 > [20190527221107]Process monitor (pid: 2656, stack limit = 0xffd3bff0c000) > [20190527221107] > [20190527221107][] rb_erase+0x14/0x320 > [20190527221107][] drop_sysctl_table+0x17c/0x1d4 > [20190527221107][] drop_sysctl_table+0x1a4/0x1d4 > [20190527221107][] unregister_sysctl_table+0x9c/0xa8 > [20190527221107][] unregister_sysctl_table+0x60/0xa8 > [20190527221107][] partition_sched_domains+0x64/0x338 > [20190527221107][] rebuild_sched_domains_locked+0xe0/0x3c0 > [20190527221107][] cpuset_write_resmask+0x288/0x8cc > [20190527221107][] cgroup_file_write+0x64/0x128 > [20190527221107][] kernfs_fop_write+0x15c/0x1ac > [20190527221107][] __vfs_write+0x60/0x124 > [20190527221107][] vfs_write+0xb0/0x184 > [20190527221107][] SyS_write+0x6c/0xcc > [20190527221107][] __sys_trace_return+0x0/0x4 > > > The disassembler and the source code about the backtrace are as below: > > rb_erase()->__rb_erase_augmented()->__rb_change_child() > __rb_erase_augmented(): > ff800846e514: a9409006ldp x6, x4, [x0, #8] > ff800846e518: b5000244cbnzx4, ff800846e560 > > ff800846e51c: f943ldr x3, [x0] > __rb_change_child(): > ff800846e520: f27ef462andsx2, x3, #0xfffc > ff800846e524: 54000140b.eqff800846e54c > // b.none > ff800846e528: f9400844ldr x4, [x2, #16] > ff800846e52c: eb04001fcmp x0, x4 > > ff800846e530: 54a1b.neff800846e544 &
[PATCH v5 0/4] support reserving crashkernel above 4G on arm64 kdump
When crashkernel is reserved above 4G in memory, kernel should reserve some amount of low memory for swiotlb and some DMA buffers. So there may be two crash kernel regions, one is below 4G, the other is above 4G. Crash dump kernel reads more than one crash kernel regions via a dtb property under node /chosen, linux,usable-memory-range = . Besides, we need to modify kexec-tools: arm64: support more than one crash kernel regions(see [1]) Changes since [v4] - reimplement memblock_cap_memory_ranges for multiple ranges. Changes since [v3] - Add memblock_cap_memory_ranges for multiple ranges. - Split patch "arm64: kdump: support more than one crash kernel regions" as two. One is above "Add memblock_cap_memory_ranges", the other is using memblock_cap_memory_ranges to support multiple crash kernel regions. - Fix some compiling warnings. Changes since [v2] - Split patch "arm64: kdump: support reserving crashkernel above 4G" as two. Put "move reserve_crashkernel_low() into kexec_core.c" in a separate patch. Changes since [v1]: - Move common reserve_crashkernel_low() code into kernel/kexec_core.c. - Remove memblock_cap_memory_ranges() i added in v1 and implement that in fdt_enforce_memory_region(). There are at most two crash kernel regions, for two crash kernel regions case, we cap the memory range [min(regs[*].start), max(regs[*].end)] and then remove the memory range in the middle. [1]: http://lists.infradead.org/pipermail/kexec/2019-April/022792.html [v1]: https://lkml.org/lkml/2019/4/8/628 [v2]: https://lkml.org/lkml/2019/4/9/86 [v3]: https://lkml.org/lkml/2019/4/15/6 [v4]: https://lkml.org/lkml/2019/4/15/273 Chen Zhou (4): x86: kdump: move reserve_crashkernel_low() into kexec_core.c arm64: kdump: support reserving crashkernel above 4G memblock: extend memblock_cap_memory_range to multiple ranges kdump: update Documentation about crashkernel on arm64 Documentation/admin-guide/kernel-parameters.txt | 4 +- arch/arm64/include/asm/kexec.h | 3 ++ arch/arm64/kernel/setup.c | 3 ++ arch/arm64/mm/init.c| 59 -- arch/x86/include/asm/kexec.h| 3 ++ arch/x86/kernel/setup.c | 66 +++-- include/linux/kexec.h | 5 ++ include/linux/memblock.h| 2 +- kernel/kexec_core.c | 56 + mm/memblock.c | 44 - 10 files changed, 144 insertions(+), 101 deletions(-) -- 2.7.4
Re: [PATCH v3 3/4] arm64: kdump: support more than one crash kernel regions
Hi Mike, On 2019/4/14 20:13, Mike Rapoport wrote: > Hi, > > On Tue, Apr 09, 2019 at 06:28:18PM +0800, Chen Zhou wrote: >> After commit (arm64: kdump: support reserving crashkernel above 4G), >> there may be two crash kernel regions, one is below 4G, the other is >> above 4G. >> >> Crash dump kernel reads more than one crash kernel regions via a dtb >> property under node /chosen, >> linux,usable-memory-range = > > Somehow I've missed that previously, but how is this supposed to work on > EFI systems? Whatever the way in which the systems work, there is FDT pointer(__fdt_pointer) in arm64 kernel and file /sys/firmware/fdt will be created in late_initcall. Kexec-tools read and update file /sys/firmware/fdt in EFI systems to support kdump to boot capture kernel. For supporting more than one crash kernel regions, kexec-tools make changes accordingly. Details are in below: http://lists.infradead.org/pipermail/kexec/2019-April/022792.html Thanks, Chen Zhou > >> Signed-off-by: Chen Zhou >> --- >> arch/arm64/mm/init.c | 66 >> >> include/linux/memblock.h | 6 + >> mm/memblock.c| 7 ++--- >> 3 files changed, 66 insertions(+), 13 deletions(-) >> >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c >> index 3bebddf..0f18665 100644 >> --- a/arch/arm64/mm/init.c >> +++ b/arch/arm64/mm/init.c >> @@ -65,6 +65,11 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; >> >> #ifdef CONFIG_KEXEC_CORE >> >> +/* at most two crash kernel regions, low_region and high_region */ >> +#define CRASH_MAX_USABLE_RANGES 2 >> +#define LOW_REGION_IDX 0 >> +#define HIGH_REGION_IDX 1 >> + >> /* >> * reserve_crashkernel() - reserves memory for crash kernel >> * >> @@ -297,8 +302,8 @@ static int __init early_init_dt_scan_usablemem(unsigned >> long node, >> const char *uname, int depth, void *data) >> { >> struct memblock_region *usablemem = data; >> -const __be32 *reg; >> -int len; >> +const __be32 *reg, *endp; >> +int len, nr = 0; >> >> if (depth != 1 || strcmp(uname, "chosen") != 0) >> return 0; >> @@ -307,22 +312,63 @@ static int __init >> early_init_dt_scan_usablemem(unsigned long node, >> if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) >> return 1; >> >> -usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ); >> -usablemem->size = dt_mem_next_cell(dt_root_size_cells, ); >> +endp = reg + (len / sizeof(__be32)); >> +while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { >> +usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, ); >> +usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, ); >> + >> +if (++nr >= CRASH_MAX_USABLE_RANGES) >> +break; >> +} >> >> return 1; >> } >> >> static void __init fdt_enforce_memory_region(void) >> { >> -struct memblock_region reg = { >> -.size = 0, >> -}; >> +int i, cnt = 0; >> +struct memblock_region regs[CRASH_MAX_USABLE_RANGES]; >> + >> +memset(regs, 0, sizeof(regs)); >> +of_scan_flat_dt(early_init_dt_scan_usablemem, regs); >> + >> +for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++) >> +if (regs[i].size) >> +cnt++; >> +else >> +break; >> + >> +if (cnt - 1 == LOW_REGION_IDX) >> +memblock_cap_memory_range(regs[LOW_REGION_IDX].base, >> +regs[LOW_REGION_IDX].size); >> +else if (cnt - 1 == HIGH_REGION_IDX) { >> +/* >> + * Two crash kernel regions, cap the memory range >> + * [regs[LOW_REGION_IDX].base, regs[HIGH_REGION_IDX].end] >> + * and then remove the memory range in the middle. >> + */ >> +int start_rgn, end_rgn, i, ret; >> +phys_addr_t mid_base, mid_size; >> + >> +mid_base = regs[LOW_REGION_IDX].base + >> regs[LOW_REGION_IDX].size; >> +mid_size = regs[HIGH_REGION_IDX].base - mid_base; >> +ret = memblock_isolate_range(, mid_base, >> +mid_size, _rgn, _rgn); >> >> -of_scan_flat_dt(early_init_dt_scan_usablemem, ); &g