[PATCH v6 03/10] kexec_file: factor out arch_kexec_kernel_*() from x86, powerpc
arch_kexec_kernel_*() and arch_kimage_file_post_load_cleanup can now be duplicated among some architectures, so let's factor them out. Signed-off-by: AKASHI Takahiro Cc: Dave Young Cc: Vivek Goyal Cc: Baoquan He Cc: Michael Ellerman Cc: Thiago Jung Bauermann --- arch/powerpc/include/asm/kexec.h| 2 +- arch/powerpc/kernel/kexec_elf_64.c | 2 +- arch/powerpc/kernel/machine_kexec_file_64.c | 39 ++-- arch/x86/include/asm/kexec-bzimage64.h | 2 +- arch/x86/kernel/kexec-bzimage64.c | 2 +- arch/x86/kernel/machine_kexec_64.c | 45 +-- include/linux/kexec.h | 15 kernel/kexec_file.c | 57 +++-- 8 files changed, 70 insertions(+), 94 deletions(-) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 25668bc8cb2a..23588952d8bd 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -92,7 +92,7 @@ static inline bool kdump_in_progress(void) } #ifdef CONFIG_KEXEC_FILE -extern struct kexec_file_ops kexec_elf64_ops; +extern const struct kexec_file_ops kexec_elf64_ops; #ifdef CONFIG_IMA_KEXEC #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index 9a42309b091a..6c78c11c7faf 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -657,7 +657,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, return ret ? ERR_PTR(ret) : fdt; } -struct kexec_file_ops kexec_elf64_ops = { +const struct kexec_file_ops kexec_elf64_ops = { .probe = elf64_probe, .load = elf64_load, }; diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 992c0d258e5d..cd2a7671f5c6 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -31,52 +31,19 @@ #define SLAVE_CODE_SIZE256 -static struct kexec_file_ops *kexec_file_loaders[] = { +const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_elf64_ops, + NULL }; int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) { - int i, ret = -ENOEXEC; - struct kexec_file_ops *fops; - /* We don't support crash kernels yet. */ if (image->type == KEXEC_TYPE_CRASH) return -ENOTSUPP; - for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { - fops = kexec_file_loaders[i]; - if (!fops || !fops->probe) - continue; - - ret = fops->probe(buf, buf_len); - if (!ret) { - image->fops = fops; - return ret; - } - } - - return ret; -} - -void *arch_kexec_kernel_image_load(struct kimage *image) -{ - if (!image->fops || !image->fops->load) - return ERR_PTR(-ENOEXEC); - - return image->fops->load(image, image->kernel_buf, -image->kernel_buf_len, image->initrd_buf, -image->initrd_buf_len, image->cmdline_buf, -image->cmdline_buf_len); -} - -int arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - if (!image->fops || !image->fops->cleanup) - return 0; - - return image->fops->cleanup(image->image_loader_data); + return _kexec_kernel_image_probe(image, buf, buf_len); } /** diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h index d1b5d194e31d..284fd23d133b 100644 --- a/arch/x86/include/asm/kexec-bzimage64.h +++ b/arch/x86/include/asm/kexec-bzimage64.h @@ -1,6 +1,6 @@ #ifndef _ASM_KEXEC_BZIMAGE64_H #define _ASM_KEXEC_BZIMAGE64_H -extern struct kexec_file_ops kexec_bzImage64_ops; +extern const struct kexec_file_ops kexec_bzImage64_ops; #endif /* _ASM_KEXE_BZIMAGE64_H */ diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index fb095ba0c02f..705654776c0c 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -538,7 +538,7 @@ static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) } #endif -struct kexec_file_ops kexec_bzImage64_ops = { +const struct kexec_file_ops kexec_bzImage64_ops = { .probe = bzImage64_probe, .load = bzImage64_load, .cleanup = bzImage64_cleanup, diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf9d38f..2cdd29d64181 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -30,8 +30,9 @@ #include #ifdef CONFIG_KEXEC_FILE -static struct kexec_file_ops *kexec_file_loaders[] = { +const struct kexec_file_ops * cons
[PATCH v6 10/10] arm64: kexec_file: add Image format support
The "Image" binary will be loaded at the offset of TEXT_OFFSET from the start of system memory. TEXT_OFFSET is determined from the header of the image. Regarding kernel signature verification, it will be done through verify_pefile_signature() as arm64's "Image" binary can be seen as in PE format. This approach is consistent with x86 implementation. we can sign an image with sbsign command. Signed-off-by: AKASHI Takahiro Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/Kconfig | 7 +++ arch/arm64/include/asm/kexec.h | 66 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/kexec_image.c| 105 + arch/arm64/kernel/machine_kexec_file.c | 3 + 5 files changed, 182 insertions(+) create mode 100644 arch/arm64/kernel/kexec_image.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e37be8a59a88..a9ef277faa3e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -770,10 +770,17 @@ config KEXEC_FILE In addition to this option, you need to enable a specific type of image support. +config KEXEC_FILE_IMAGE_FMT + bool "Enable Image support" + depends on KEXEC_FILE + ---help--- + Select this option to enable 'Image' kernel loading. + config KEXEC_VERIFY_SIG bool "Verify kernel signature during kexec_file_load() syscall" depends on KEXEC_FILE select SYSTEM_DATA_VERIFICATION + select SIGNED_PE_FILE_VERIFICATION if KEXEC_FILE_IMAGE_FMT ---help--- Select this option to verify a signature with loaded kernel image. If configured, any attempt of loading a image without diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index edb702e64a8a..2a63bf5f32ea 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -104,6 +104,72 @@ struct kimage_arch { unsigned long elf_load_addr; }; +/** + * struct arm64_image_header - arm64 kernel image header + * + * @pe_sig: Optional PE format 'MZ' signature + * @branch_code: Instruction to branch to stext + * @text_offset: Image load offset, little endian + * @image_size: Effective image size, little endian + * @flags: + * Bit 0: Kernel endianness. 0=little endian, 1=big endian + * @reserved: Reserved + * @magic: Magic number, "ARM\x64" + * @pe_header: Optional offset to a PE format header + **/ + +struct arm64_image_header { + u8 pe_sig[2]; + u8 pad[2]; + u32 branch_code; + u64 text_offset; + u64 image_size; + u64 flags; + u64 reserved[3]; + u8 magic[4]; + u32 pe_header; +}; + +static const u8 arm64_image_magic[4] = {'A', 'R', 'M', 0x64U}; +static const u8 arm64_image_pe_sig[2] = {'M', 'Z'}; + +/** + * arm64_header_check_magic - Helper to check the arm64 image header. + * + * Returns non-zero if header is OK. + */ + +static inline int arm64_header_check_magic(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + if (!h->text_offset) + return 0; + + return (h->magic[0] == arm64_image_magic[0] + && h->magic[1] == arm64_image_magic[1] + && h->magic[2] == arm64_image_magic[2] + && h->magic[3] == arm64_image_magic[3]); +} + +/** + * arm64_header_check_pe_sig - Helper to check the arm64 image header. + * + * Returns non-zero if 'MZ' signature is found. + */ + +static inline int arm64_header_check_pe_sig(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (h->pe_sig[0] == arm64_image_pe_sig[0] + && h->pe_sig[1] == arm64_image_pe_sig[1]); +} + +extern const struct kexec_file_ops kexec_image_ops; + struct kimage; #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 5df003d6157c..a1161bab6810 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -51,6 +51,7 @@ arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o arm64-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o +arm64-obj-$(CONFIG_KEXEC_FILE_IMAGE_FMT) += kexec_image.o arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c new file mode 100644 index ..b840b6ed6ed9 --- /dev/null +++ b/arch/arm64/kernel/kexec_image.c @@ -0,0 +1,105 @@ +/* + * Kexec image loader + + * Copyright (C) 2017 Linaro Limited + * Author: AKASHI Takahiro + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the
[PATCH v6 02/10] resource: add walk_system_ram_res_rev()
This function, being a variant of walk_system_ram_res() introduced in commit 8c86e70acead ("resource: provide new functions to walk through resources"), walks through a list of all the resources of System RAM in reversed order, i.e., from higher to lower. It will be used in kexec_file implementation on arm64. Signed-off-by: AKASHI Takahiro Cc: Vivek Goyal Cc: Andrew Morton Cc: Linus Torvalds --- include/linux/ioport.h | 3 +++ kernel/resource.c | 57 ++ 2 files changed, 60 insertions(+) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index f5cf32e80041..62eb62b98118 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -273,6 +273,9 @@ extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); extern int +walk_system_ram_res_rev(u64 start, u64 end, void *arg, + int (*func)(u64, u64, void *)); +extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); diff --git a/kernel/resource.c b/kernel/resource.c index 9b5f04404152..69c31454cb2e 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -469,6 +471,61 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, return ret; } +int walk_system_ram_res_rev(u64 start, u64 end, void *arg, + int (*func)(u64, u64, void *)) +{ + struct resource res, *rams; + int rams_size = 16, i; + int ret = -1; + + /* create a list */ + rams = vmalloc(sizeof(struct resource) * rams_size); + if (!rams) + return ret; + + res.start = start; + res.end = end; + res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + i = 0; + while ((res.start < res.end) && + (!find_next_iomem_res(&res, IORES_DESC_NONE, true))) { + if (i >= rams_size) { + /* re-alloc */ + struct resource *rams_new; + int rams_new_size; + + rams_new_size = rams_size + 16; + rams_new = vmalloc(sizeof(struct resource) + * rams_new_size); + if (!rams_new) + goto out; + + memcpy(rams_new, rams, + sizeof(struct resource) * rams_size); + vfree(rams); + rams = rams_new; + rams_size = rams_new_size; + } + + rams[i].start = res.start; + rams[i++].end = res.end; + + res.start = res.end + 1; + res.end = end; + } + + /* go reverse */ + for (i--; i >= 0; i--) { + ret = (*func)(rams[i].start, rams[i].end, arg); + if (ret) + break; + } + +out: + vfree(rams); + return ret; +} + #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* -- 2.14.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v6 09/10] arm64: enable KEXEC_FILE config
Modify arm64/Kconfig and Makefile to enable kexec_file_load support. Signed-off-by: AKASHI Takahiro Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/Kconfig | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0df64a6a56d4..e37be8a59a88 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -757,6 +757,28 @@ config KEXEC but it is independent of the system firmware. And like a reboot you can start any kernel with it, not just Linux. +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + ---help--- + This is new version of kexec system call. This system call is + file based and takes file descriptors as system call argument + for kernel and initramfs as opposed to list of segments as + accepted by previous system call. + + In addition to this option, you need to enable a specific type + of image support. + +config KEXEC_VERIFY_SIG + bool "Verify kernel signature during kexec_file_load() syscall" + depends on KEXEC_FILE + select SYSTEM_DATA_VERIFICATION + ---help--- + Select this option to verify a signature with loaded kernel + image. If configured, any attempt of loading a image without + valid signature will fail. + config CRASH_DUMP bool "Build kdump crash kernel" help -- 2.14.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v6 07/10] arm64: kexec_file: load initrd, device-tree and purgatory segments
load_other_segments() sets up and adds all the memory segments necessary other than kernel, including initrd, device-tree blob and purgatory. Most of the code was borrowed from kexec-tools' counterpart. arch_kimage_kernel_post_load_cleanup() is meant to free arm64-specific data allocated for loading kernel. Signed-off-by: AKASHI Takahiro Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/include/asm/kexec.h | 22 arch/arm64/kernel/Makefile | 3 +- arch/arm64/kernel/machine_kexec_file.c | 213 + 3 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/machine_kexec_file.c diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index e17f0529a882..2fadd3cbf3af 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -93,6 +93,28 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#ifdef CONFIG_KEXEC_FILE +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *dtb_buf; +}; + +struct kimage; + +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup +extern int arch_kimage_file_post_load_cleanup(struct kimage *image); + +extern int setup_dtb(struct kimage *image, + unsigned long initrd_load_addr, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len, + char **dtb_buf, size_t *dtb_buf_len); +extern int load_other_segments(struct kimage *image, + unsigned long kernel_load_addr, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len); +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index f2b4e816b6de..5df003d6157c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -48,8 +48,9 @@ arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION)+= hibernate.o hibernate-asm.o -arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ +arm64-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o +arm64-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c new file mode 100644 index ..81af1602e149 --- /dev/null +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -0,0 +1,213 @@ +/* + * kexec_file for arm64 + * + * Copyright (C) 2017 Linaro Limited + * Author: AKASHI Takahiro + * + * Most code is derived from arm64 port of kexec-tools + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) "kexec_file: " fmt + +#include +#include +#include +#include +#include +#include + +static int __dt_root_addr_cells; +static int __dt_root_size_cells; + +const struct kexec_file_ops * const kexec_file_loaders[] = { + NULL +}; + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->arch.dtb_buf); + image->arch.dtb_buf = NULL; + + return _kimage_file_post_load_cleanup(image); +} + +int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +{ + if (kbuf->image->type == KEXEC_TYPE_CRASH) + return walk_iomem_res_desc(crashk_res.desc, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + crashk_res.start, crashk_res.end, + kbuf, func); + else if (kbuf->top_down) + return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func); + else + return walk_system_ram_res(0, ULONG_MAX, kbuf, func); +} + +int setup_dtb(struct kimage *image, + unsigned long initrd_load_addr, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len, + char **dtb_buf, size_t *dtb_buf_len) +{ + char *buf = NULL; + size_t buf_size; + int nodeoffset; + u64 value; + int range_len; + int ret; + + /* duplicate dt blob */ + buf_size = fdt_totalsize(initial_boot_params); + range_len = (__dt_root_addr_cells + __dt_root_size_cells) * sizeof(u32); + + if (initrd_load_addr) + buf_size += fdt_prop_len("initrd-start", sizeof(u64)) + + fdt_pr
[PATCH v6 01/10] include: pe.h: remove message[] from mz header definition
message[] field won't be part of the definition of mz header. This change is crucial for enabling kexec_file_load on arm64 because arm64's "Image" binary, as in PE format, doesn't have any data for it and accordingly the following check in pefile_parse_binary() will fail: chkaddr(cursor, mz->peaddr, sizeof(*pe)); Signed-off-by: AKASHI Takahiro Reviewed-by: Ard Biesheuvel Cc: David Howells Cc: Vivek Goyal Cc: Herbert Xu Cc: David S. Miller --- include/linux/pe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pe.h b/include/linux/pe.h index 143ce75be5f0..3482b18a48b5 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -166,7 +166,7 @@ struct mz_hdr { uint16_t oem_info; /* oem specific */ uint16_t reserved1[10]; /* reserved */ uint32_t peaddr;/* address of pe header */ - char message[64]; /* message to print */ + char message[]; /* message to print */ }; struct mz_reloc { -- 2.14.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v6 08/10] arm64: kexec_file: set up for crash dump adding elf core header
load_crashdump_segments() creates and loads a memory segment of elf core header for crash dump. "linux,usable-memory-range" and "linux,elfcorehdr" will add to the 2nd kernel's device-tree blob. The logic of this cod is also from kexec-tools. Signed-off-by: AKASHI Takahiro Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/include/asm/kexec.h | 5 ++ arch/arm64/kernel/machine_kexec_file.c | 151 + kernel/kexec_file.c| 2 +- 3 files changed, 157 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 2fadd3cbf3af..edb702e64a8a 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -98,6 +98,10 @@ static inline void crash_post_resume(void) {} struct kimage_arch { void *dtb_buf; + /* Core ELF header buffer */ + void *elf_headers; + unsigned long elf_headers_sz; + unsigned long elf_load_addr; }; struct kimage; @@ -113,6 +117,7 @@ extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, char *initrd, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len); +extern int load_crashdump_segments(struct kimage *image); #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 81af1602e149..cdedd4b05cac 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -19,6 +19,7 @@ #include #include #include +#include static int __dt_root_addr_cells; static int __dt_root_size_cells; @@ -32,6 +33,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) vfree(image->arch.dtb_buf); image->arch.dtb_buf = NULL; + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + return _kimage_file_post_load_cleanup(image); } @@ -48,6 +53,78 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) return walk_system_ram_res(0, ULONG_MAX, kbuf, func); } +static int __init arch_kexec_file_init(void) +{ + /* Those values are used later on loading the kernel */ + __dt_root_addr_cells = dt_root_addr_cells; + __dt_root_size_cells = dt_root_size_cells; + + return 0; +} +late_initcall(arch_kexec_file_init); + +#define FDT_ALIGN(x, a)(((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x)(FDT_ALIGN((x), FDT_TAGSIZE)) + +static int fdt_prop_len(const char *prop_name, int len) +{ + return (strlen(prop_name) + 1) + + sizeof(struct fdt_property) + + FDT_TAGALIGN(len); +} + +static bool cells_size_fitted(unsigned long base, unsigned long size) +{ + /* if *_cells >= 2, cells can hold 64-bit values anyway */ + if ((__dt_root_addr_cells == 1) && (base >= (1ULL << 32))) + return false; + + if ((__dt_root_size_cells == 1) && (size >= (1ULL << 32))) + return false; + + return true; +} + +static void fill_property(void *buf, u64 val64, int cells) +{ + u32 val32; + + if (cells == 1) { + val32 = cpu_to_fdt32((u32)val64); + memcpy(buf, &val32, sizeof(val32)); + } else { + memset(buf, 0, cells * sizeof(u32) - sizeof(u64)); + buf += cells * sizeof(u32) - sizeof(u64); + + val64 = cpu_to_fdt64(val64); + memcpy(buf, &val64, sizeof(val64)); + } +} + +static int fdt_setprop_range(void *fdt, int nodeoffset, const char *name, + unsigned long addr, unsigned long size) +{ + void *buf, *prop; + size_t buf_size; + int result; + + buf_size = (__dt_root_addr_cells + __dt_root_size_cells) * sizeof(u32); + prop = buf = vmalloc(buf_size); + if (!buf) + return -ENOMEM; + + fill_property(prop, addr, __dt_root_addr_cells); + prop += __dt_root_addr_cells * sizeof(u32); + + fill_property(prop, size, __dt_root_size_cells); + + result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size); + + vfree(buf); + + return result; +} + int setup_dtb(struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len, @@ -60,10 +137,26 @@ int setup_dtb(struct kimage *image, int range_len; int ret; + /* check ranges against root's #address-cells and #size-cells */ + if (image->type == KEXEC_TYPE_CRASH && + (!cells_size_fitted(image->arch.elf_load_addr, + image->arch.elf_headers_sz) || +!cells_size_fitted(crashk_res.start, + crashk_res.end - crashk_res.start + 1))) { + pr_err("Crash memory
[PATCH v6 05/10] asm-generic: add kexec_file_load system call to unistd.h
The initial user of this system call number is arm64. Signed-off-by: AKASHI Takahiro Acked-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 061185a5eb51..086697fe3917 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -731,9 +731,11 @@ __SYSCALL(__NR_pkey_alloc,sys_pkey_alloc) __SYSCALL(__NR_pkey_free, sys_pkey_free) #define __NR_statx 291 __SYSCALL(__NR_statx, sys_statx) +#define __NR_kexec_file_load 292 +__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) #undef __NR_syscalls -#define __NR_syscalls 292 +#define __NR_syscalls 293 /* * All syscalls below here should go away really, -- 2.14.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v6 06/10] arm64: kexec_file: create purgatory
This is a basic purgatory, or a kind of glue code between the two kernels, for arm64. Since purgatory is assumed to be relocatable (not executable) object by kexec generic code, arch_kexec_apply_relocations_add() is required in general. Arm64's purgatory, however, is a simple asm and all the references can be resolved as local, no re-linking is needed here. Please note that even if we don't support digest check at purgatory we need purgatory_sha_regions and purgatory_sha256_digest as they are referenced by generic kexec code. Signed-off-by: AKASHI Takahiro Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/Makefile | 1 + arch/arm64/purgatory/Makefile | 24 +++ arch/arm64/purgatory/entry.S | 55 +++ 3 files changed, 80 insertions(+) create mode 100644 arch/arm64/purgatory/Makefile create mode 100644 arch/arm64/purgatory/entry.S diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 939b310913cf..cf39ec3baf5a 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -110,6 +110,7 @@ core-$(CONFIG_XEN) += arch/arm64/xen/ core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +core-$(CONFIG_KEXEC_FILE) += arch/arm64/purgatory/ # Default target when executing plain make boot := arch/arm64/boot diff --git a/arch/arm64/purgatory/Makefile b/arch/arm64/purgatory/Makefile new file mode 100644 index ..c2127a2cbd51 --- /dev/null +++ b/arch/arm64/purgatory/Makefile @@ -0,0 +1,24 @@ +OBJECT_FILES_NON_STANDARD := y + +purgatory-y := entry.o + +targets += $(purgatory-y) +PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) + +LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined \ + -nostdlib -z nodefaultlib +targets += purgatory.ro + +$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + +targets += kexec_purgatory.c + +CMD_BIN2C = $(objtree)/scripts/basic/bin2c +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@ + +$(obj)/kexec_purgatory.c: $(obj)/purgatory.ro FORCE + $(call if_changed,bin2c) + +obj-${CONFIG_KEXEC_FILE} += kexec_purgatory.o diff --git a/arch/arm64/purgatory/entry.S b/arch/arm64/purgatory/entry.S new file mode 100644 index ..fe6e968076db --- /dev/null +++ b/arch/arm64/purgatory/entry.S @@ -0,0 +1,55 @@ +/* + * kexec core purgatory + */ +#include +#include + +#define SHA256_DIGEST_SIZE 32 /* defined in crypto/sha.h */ + +.text + +ENTRY(purgatory_start) + /* Start new image. */ + ldr x17, __kernel_entry + ldr x0, __dtb_addr + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x17 +END(purgatory_start) + +/* + * data section: + * kernel_entry and dtb_addr are global but also labelled as local, + * "__xxx:", to avoid unwanted re-linking. + * + * purgatory_sha_regions and purgatory_sha256_digest are referenced + * by kexec generic code and so must exist, but not actually used + * here because hash check is not that useful in purgatory. + */ +.align 3 + +.globl kernel_entry +kernel_entry: +__kernel_entry: + .quad 0 +END(kernel_entry) + +.globl dtb_addr +dtb_addr: +__dtb_addr: + .quad 0 +END(dtb_addr) + +.globl purgatory_sha_regions +purgatory_sha_regions: + .rept KEXEC_SEGMENT_MAX + .quad 0 + .quad 0 + .endr +END(purgatory_sha_regions) + +.globl purgatory_sha256_digest +purgatory_sha256_digest: +.skip SHA256_DIGEST_SIZE +END(purgatory_sha256_digest) -- 2.14.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v6 04/10] kexec_file: factor out crashdump elf header function from x86
prepare_elf_headers() can also be useful for other architectures, including arm64. So let it factored out. Signed-off-by: AKASHI Takahiro Cc: Dave Young Cc: Vivek Goyal Cc: Baoquan He --- arch/x86/kernel/crash.c | 324 include/linux/kexec.h | 17 +++ kernel/kexec_file.c | 308 + kernel/kexec_internal.h | 20 +++ 4 files changed, 345 insertions(+), 324 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 44404e2307bb..3c6b880f6dbf 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -41,34 +40,6 @@ /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -/* This primarily represents number of split ranges due to exclusion */ -#define CRASH_MAX_RANGES 16 - -struct crash_mem_range { - u64 start, end; -}; - -struct crash_mem { - unsigned int nr_ranges; - struct crash_mem_range ranges[CRASH_MAX_RANGES]; -}; - -/* Misc data about ram ranges needed to prepare elf headers */ -struct crash_elf_data { - struct kimage *image; - /* -* Total number of ram ranges we have after various adjustments for -* crash reserved region, etc. -*/ - unsigned int max_nr_ranges; - - /* Pointer to elf header */ - void *ehdr; - /* Pointer to next phdr */ - void *bufp; - struct crash_mem mem; -}; - /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { struct boot_params *params; @@ -209,301 +180,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) -{ - unsigned int *nr_ranges = arg; - - (*nr_ranges)++; - return 0; -} - - -/* Gather all the required information to prepare elf headers for ram regions */ -static void fill_up_crash_elf_data(struct crash_elf_data *ced, - struct kimage *image) -{ - unsigned int nr_ranges = 0; - - ced->image = image; - - walk_system_ram_res(0, -1, &nr_ranges, - get_nr_ram_ranges_callback); - - ced->max_nr_ranges = nr_ranges; - - /* Exclusion of crash region could split memory ranges */ - ced->max_nr_ranges++; - - /* If crashk_low_res is not 0, another range split possible */ - if (crashk_low_res.end) - ced->max_nr_ranges++; -} - -static int exclude_mem_range(struct crash_mem *mem, - unsigned long long mstart, unsigned long long mend) -{ - int i, j; - unsigned long long start, end; - struct crash_mem_range temp_range = {0, 0}; - - for (i = 0; i < mem->nr_ranges; i++) { - start = mem->ranges[i].start; - end = mem->ranges[i].end; - - if (mstart > end || mend < start) - continue; - - /* Truncate any area outside of range */ - if (mstart < start) - mstart = start; - if (mend > end) - mend = end; - - /* Found completely overlapping range */ - if (mstart == start && mend == end) { - mem->ranges[i].start = 0; - mem->ranges[i].end = 0; - if (i < mem->nr_ranges - 1) { - /* Shift rest of the ranges to left */ - for (j = i; j < mem->nr_ranges - 1; j++) { - mem->ranges[j].start = - mem->ranges[j+1].start; - mem->ranges[j].end = - mem->ranges[j+1].end; - } - } - mem->nr_ranges--; - return 0; - } - - if (mstart > start && mend < end) { - /* Split original range */ - mem->ranges[i].end = mstart - 1; - temp_range.start = mend + 1; - temp_range.end = end; - } else if (mstart != start) - mem->ranges[i].end = mstart - 1; - else - mem->ranges[i].start = mend + 1; - break; - } - - /* If a split happend, add the split to array */ - if (!temp_range.end) - return 0; - - /* Split happened */ - if (i == CRASH_MAX_RANGES - 1) { - pr_err("Too many crash ranges after split\n"); - return -ENOMEM; - } - - /* Location where new range should go */ - j = i + 1; - if (j < mem->nr_ranges) { -
[PATCH v6 00/10] arm64: kexec: add kexec_file_load() support
This is the sixth round of implementing kexec_file_load() support on arm64.[1] Most of the code is based on kexec-tools (along with some kernel code from x86, which also came from kexec-tools). This patch series enables us to * load the kernel, Image, with kexec_file_load system call, and * optionally verify its signature at load time for trusted boot. To load the kernel via kexec_file_load system call, a small change is also needed to kexec-tools. See [2]. This enables '-s' option. (Please use v7.2.1+ crash for v4.14+ kernel) As we discussed a long time ago, users may not be allowed to specify device-tree file of the 2nd kernel explicitly with kexec-tools, hence re-using the blob of the first kernel. Regarding a signing method, we conform with x86 (or rather Microsoft?) style of signing since the binary can also be seen as in PE format (assuming that CONFIG_EFI is enabled). Powerpc is also going to support extended-file-attribute-based verification[3] with vmlinux, but arm64 doesn't for now partly because we don't have TPM-based IMA at this moment. Accordingly, we can use the existing command, sbsign, to sign the kernel. $ sbsign --key ${KEY} --cert ${CERT} Image Please note that it is totally up to the system what key/certificate is used for signing, but one of easy ways to *try* this feature is to turn on CONFIG_MODULE_SIG so that we can reuse certs/signing_key.pem as a signing key, KEY and CERT above, for kernel. (This also enables CONFIG_CRYPTO_SHA1 by default.) Some concerns(or future works): * Even if the kernel is configured with CONFIG_RANDOMIZE_BASE, the 2nd kernel won't be placed at a randomized address. We will have to add some boot code similar to efi-stub to implement the feature. * While big-endian kernel can support kernel signing, I'm not sure that Image can be recognized as in PE format because x86 standard only defines little-endian-based format. * IMA(and extended file attribute)-based kexec * vmlinux support [1] http://git.linaro.org/people/takahiro.akashi/linux-aarch64.git branch:arm64/kexec_file [2] http://git.linaro.org/people/takahiro.akashi/kexec-tools.git branch:arm64/kexec_file [3] http://lkml.iu.edu//hypermail/linux/kernel/1707.0/03669.html Changes in v6 (Oct 24, 2017) * fix a for-loop bug in _kexec_kernel_image_probe() per Julien Changes in v5 (Oct 10, 2017) * fix kbuild errors around patch #3 per Julien's comments, * fix a bug in walk_system_ram_res_rev() with some cleanup * modify fdt_setprop_range() to use vmalloc() * modify fill_property() to use memset() Changes in v4 (Oct 2, 2017) * reinstate x86's arch_kexec_kernel_image_load() * rename weak arch_kexec_kernel_xxx() to _kexec_kernel_xxx() for better re-use * constify kexec_file_loaders[] Changes in v3 (Sep 15, 2017) * fix kbuild test error * factor out arch_kexec_kernel_*() & arch_kimage_file_post_load_cleanup() * remove CONFIG_CRASH_CORE guard from kexec_file.c * add vmapped kernel region to vmcore for gdb backtracing (see prepare_elf64_headers()) * merge asm/kexec_file.h into asm/kexec.h * and some cleanups Changes in v2 (Sep 8, 2017) * move core-header-related functions from crash_core.c to kexec_file.c * drop hash-check code from purgatory * modify purgatory asm to remove arch_kexec_apply_relocations_add() * drop older kernel support * drop vmlinux support (at least, for this series) Patch #1 to #5 are all preparatory patches on generic side. Patch #6 is purgatory code. Patch #7 to #9 are common for enabling kexec_file_load. Patch #10 is for 'Image' support. AKASHI Takahiro (10): include: pe.h: remove message[] from mz header definition resource: add walk_system_ram_res_rev() kexec_file: factor out arch_kexec_kernel_*() from x86, powerpc kexec_file: factor out crashdump elf header function from x86 asm-generic: add kexec_file_load system call to unistd.h arm64: kexec_file: create purgatory arm64: kexec_file: load initrd, device-tree and purgatory segments arm64: kexec_file: set up for crash dump adding elf core header arm64: enable KEXEC_FILE config arm64: kexec_file: add Image format support arch/arm64/Kconfig | 29 +++ arch/arm64/Makefile | 1 + arch/arm64/include/asm/kexec.h | 93 +++ arch/arm64/kernel/Makefile | 4 +- arch/arm64/kernel/kexec_image.c | 105 arch/arm64/kernel/machine_kexec_file.c | 367 arch/arm64/purgatory/Makefile | 24 ++ arch/arm64/purgatory/entry.S| 55 + arch/powerpc/include/asm/kexec.h| 2 +- arch/powerpc/kernel/kexec_elf_64.c | 2 +- arch/powerpc/kernel/machine_kexec_file_64.c | 39 +-- arch/x86/include/asm/kexec-bzimage64.h | 2 +- arch/x86/kernel/crash.c | 324 arch/x86/kernel/kexec-bzimage64.c | 2 +- arch/x86/kernel/machine_kex
Re: [PATCH 3/3] kdump: round up the total memory size to 128M for crashkernel reservation
Hi Baoquan, On 10/24/17 at 01:57pm, Baoquan He wrote: > Hi Dave, > > On 10/24/17 at 01:31pm, Dave Young wrote: > > The total memory size we get in kernel is usually slightly less than 2G > > with a > > 2G memory module machine. The main reason is bios/firmware reserve some area > > it will not export all memory as usable to Linux. > > > > 2G memory X86 kvm guest test result of the total_mem value: > > UEFI boot with ovmf: 0x7ef1 > > Legacy boot kvm guest: 0x7ff7cc00 > > > > An option is to use dmi/smbios to get physical memory size, but it's not > > reliable as well. According to Prarit hardware vendors sometimes screw this > > up. > > Thus we choose to round up total size to 128M to workaround this problem. > > This is a best effort workaround, will improve it when we have better way > > in the future. > > Thanks for this posting. While I don't get the point of this patch. So > firmware take piece of memory, then why we need to count it into the > total memory which we want to calculate a crashkernel memory based on. > > Not counting that, is there anyting incorrect? Yes, considering crashkernel=1G-2G:128M, if we have a 1G memory machine, we get total size 1023M from firmware then it will not fall into 1G-2G thus no memory reserved. User will never know that, it is hard to let user to know the exact total value we get in kernel.. > > Thanks > Baoquan > > > > > Signed-off-by: Dave Young > > --- > > kernel/crash_core.c | 17 + > > 1 file changed, 13 insertions(+), 4 deletions(-) > > > > --- linux.orig/kernel/crash_core.c > > +++ linux/kernel/crash_core.c > > @@ -42,6 +42,15 @@ static int __init parse_crashkernel_mem( > > { > > char *cur = cmdline, *tmp; > > bool infinite_end = false; > > + unsigned long long total_mem = system_ram; > > + > > + /* > > +* Firmware usually reserves some memory regions for it's own use. > > +* so we get less than actual system memory size. > > +* We workaround this by round up the total size to 128M which is > > +* enough for most test cases. > > +*/ > > + total_mem = roundup(total_mem, 0x800); > > > > /* for each entry of the comma-separated list */ > > do { > > @@ -86,13 +95,13 @@ static int __init parse_crashkernel_mem( > > return -EINVAL; > > } > > cur = tmp; > > - if (size >= system_ram) { > > + if (size >= total_mem) { > > pr_warn("crashkernel: invalid size\n"); > > return -EINVAL; > > } > > > > /* match ? */ > > - if (system_ram >= start && system_ram < end) { > > + if (total_mem >= start && total_mem < end) { > > *crash_size = size; > > if (end == ULLONG_MAX) > > infinite_end = true; > > @@ -126,9 +135,9 @@ static int __init parse_crashkernel_mem( > > pr_warn("Memory reservation scale order > > expected after '^'\n"); > > return -EINVAL; > > } > > - size = (system_ram - *crash_size) >> shift; > > + size = (total_mem - *crash_size) >> shift; > > size = *crash_size + roundup(size, 1ULL << 20); > > - if (size < system_ram) > > + if (size < total_mem) > > *crash_size = size; > > cur = tmp; > > } else > > > > Thanks Dave ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 1/3] kdump: extend crashkernel=range:size to dynamically increase reservation size
Hi Baoquan, On 10/24/17 at 02:00pm, Baoquan He wrote: > On 10/24/17 at 01:31pm, Dave Young wrote: > > crashkernel=range:size syntax allows to reserve specified size for system > > with total memory fall into the specified range. For example: > > crashkernel=2G-3G:128M,3G-:256M reserves 128M for system with memory >=2G > > and memory <3G, and reserves 256M for system with memory >= 3G > > > > In the above case 256M as a fixed value which can not fulfill very huge > > systems with large memory and IO devices. As memory size increases usually > > minimum memory requirement for booting will also increase. It is nearly > > impossible for a kernel to run with a fixed limited memory size for all > > kinds of systems. > > > > Thus extend the crashkernel=range:size to let user specify the scaling > > ratio in kernel cmdline like below: > > crashkernel=range:size^order, for example: > > crashkernel=2G-:128M^14 reserve 128M + (total_memory - 128M) >> 14 > > for machines with over 2G memory. > > Well, ah.., this look a little ugly and tricky. Leave this to other > reviewers to comment. But this is better than embed policy in kernel.. RHEL takes a crashkernel=auto which do the scaling. Leaving user to determine the scaling ratio is the better way. OTOH, this is an improvement to current crashkernel=range:size which can not scale the size before. > > > > > Also s/start-[end]/[start]-end/ in kernel-parameters.txt according to code > > > > Signed-off-by: Dave Young > > --- > > Documentation/admin-guide/kernel-parameters.txt | 10 -- > > Documentation/kdump/kdump.txt |7 ++-- > > kernel/crash_core.c | 35 > > +--- > > 3 files changed, 41 insertions(+), 11 deletions(-) > > > > --- linux-x86.orig/kernel/crash_core.c > > +++ linux-x86/kernel/crash_core.c > > @@ -31,7 +31,7 @@ static unsigned char *vmcoreinfo_data_sa > > /* > > * This function parses command lines in the format > > * > > - * crashkernel=ramsize-range:size[,...][@offset] > > + * crashkernel=ramsize-range:size[,...][@offset][^order] > > * > > * The function returns 0 on success and -EINVAL on failure. > > */ > > @@ -41,6 +41,7 @@ static int __init parse_crashkernel_mem( > > unsigned long long *crash_base) > > { > > char *cur = cmdline, *tmp; > > + bool infinite_end = false; > > > > /* for each entry of the comma-separated list */ > > do { > > @@ -93,13 +94,21 @@ static int __init parse_crashkernel_mem( > > /* match ? */ > > if (system_ram >= start && system_ram < end) { > > *crash_size = size; > > + if (end == ULLONG_MAX) > > + infinite_end = true; > > break; > > } > > } while (*cur++ == ','); > > > > - if (*crash_size > 0) { > > - while (*cur && *cur != ' ' && *cur != '@') > > + if (*crash_size <= 0) > > + goto out; > > + > > + while (*cur && *cur != ' ') { > > + if (*cur != '@' && *cur != '^') { > > cur++; > > + continue; > > + } > > + > > if (*cur == '@') { > > cur++; > > *crash_base = memparse(cur, &tmp); > > @@ -107,10 +116,28 @@ static int __init parse_crashkernel_mem( > > pr_warn("Memory value expected after '@'\n"); > > return -EINVAL; > > } > > - } > > + cur = tmp; > > + } else if (*cur == '^' && infinite_end ) { > > + unsigned long long shift, size; > > + > > + cur++; > > + shift = memparse(cur, &tmp); > > + if (cur == tmp) { > > + pr_warn("Memory reservation scale order > > expected after '^'\n"); > > + return -EINVAL; > > + } > > + size = (system_ram - *crash_size) >> shift; > > + size = *crash_size + roundup(size, 1ULL << 20); > > + if (size < system_ram) > > + *crash_size = size; > > + cur = tmp; > > + } else > > + cur++; > > } > > > > return 0; > > +out: > > + return -EINVAL; > > } > > > > /* > > --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt > > +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt > > @@ -680,12 +680,14 @@ > > is selected automatically. Check > > Documentation/kdump/kdump.txt for further details. > > > > - crashkernel=range1:size1[,range2:size2,...][@offset] > > + crashkernel=range1:size1[,range2:size2,...][@offset][^order] > > [KNL] Same as above, but depends on the memory > > in the running system. The synta
Re: [PATCH 1/3] kdump: extend crashkernel=range:size to dynamically increase reservation size
On 10/24/17 at 01:31pm, Dave Young wrote: > crashkernel=range:size syntax allows to reserve specified size for system > with total memory fall into the specified range. For example: > crashkernel=2G-3G:128M,3G-:256M reserves 128M for system with memory >=2G > and memory <3G, and reserves 256M for system with memory >= 3G > > In the above case 256M as a fixed value which can not fulfill very huge > systems with large memory and IO devices. As memory size increases usually > minimum memory requirement for booting will also increase. It is nearly > impossible for a kernel to run with a fixed limited memory size for all > kinds of systems. > > Thus extend the crashkernel=range:size to let user specify the scaling > ratio in kernel cmdline like below: > crashkernel=range:size^order, for example: > crashkernel=2G-:128M^14 reserve 128M + (total_memory - 128M) >> 14 > for machines with over 2G memory. Well, ah.., this look a little ugly and tricky. Leave this to other reviewers to comment. > > Also s/start-[end]/[start]-end/ in kernel-parameters.txt according to code > > Signed-off-by: Dave Young > --- > Documentation/admin-guide/kernel-parameters.txt | 10 -- > Documentation/kdump/kdump.txt |7 ++-- > kernel/crash_core.c | 35 > +--- > 3 files changed, 41 insertions(+), 11 deletions(-) > > --- linux-x86.orig/kernel/crash_core.c > +++ linux-x86/kernel/crash_core.c > @@ -31,7 +31,7 @@ static unsigned char *vmcoreinfo_data_sa > /* > * This function parses command lines in the format > * > - * crashkernel=ramsize-range:size[,...][@offset] > + * crashkernel=ramsize-range:size[,...][@offset][^order] > * > * The function returns 0 on success and -EINVAL on failure. > */ > @@ -41,6 +41,7 @@ static int __init parse_crashkernel_mem( > unsigned long long *crash_base) > { > char *cur = cmdline, *tmp; > + bool infinite_end = false; > > /* for each entry of the comma-separated list */ > do { > @@ -93,13 +94,21 @@ static int __init parse_crashkernel_mem( > /* match ? */ > if (system_ram >= start && system_ram < end) { > *crash_size = size; > + if (end == ULLONG_MAX) > + infinite_end = true; > break; > } > } while (*cur++ == ','); > > - if (*crash_size > 0) { > - while (*cur && *cur != ' ' && *cur != '@') > + if (*crash_size <= 0) > + goto out; > + > + while (*cur && *cur != ' ') { > + if (*cur != '@' && *cur != '^') { > cur++; > + continue; > + } > + > if (*cur == '@') { > cur++; > *crash_base = memparse(cur, &tmp); > @@ -107,10 +116,28 @@ static int __init parse_crashkernel_mem( > pr_warn("Memory value expected after '@'\n"); > return -EINVAL; > } > - } > + cur = tmp; > + } else if (*cur == '^' && infinite_end ) { > + unsigned long long shift, size; > + > + cur++; > + shift = memparse(cur, &tmp); > + if (cur == tmp) { > + pr_warn("Memory reservation scale order > expected after '^'\n"); > + return -EINVAL; > + } > + size = (system_ram - *crash_size) >> shift; > + size = *crash_size + roundup(size, 1ULL << 20); > + if (size < system_ram) > + *crash_size = size; > + cur = tmp; > + } else > + cur++; > } > > return 0; > +out: > + return -EINVAL; > } > > /* > --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt > +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt > @@ -680,12 +680,14 @@ > is selected automatically. Check > Documentation/kdump/kdump.txt for further details. > > - crashkernel=range1:size1[,range2:size2,...][@offset] > + crashkernel=range1:size1[,range2:size2,...][@offset][^order] > [KNL] Same as above, but depends on the memory > in the running system. The syntax of range is > - start-[end] where start and end are both > - a memory unit (amount[KMG]). See also > - Documentation/kdump/kdump.txt for an example. > + [start]-end where start and end are both > + a memory unit (amount[KMG]). In case the end of the > + range is infinity '^order' can be used to scale > +
Re: [PATCH 3/3] kdump: round up the total memory size to 128M for crashkernel reservation
Hi Dave, On 10/24/17 at 01:31pm, Dave Young wrote: > The total memory size we get in kernel is usually slightly less than 2G with a > 2G memory module machine. The main reason is bios/firmware reserve some area > it will not export all memory as usable to Linux. > > 2G memory X86 kvm guest test result of the total_mem value: > UEFI boot with ovmf: 0x7ef1 > Legacy boot kvm guest: 0x7ff7cc00 > > An option is to use dmi/smbios to get physical memory size, but it's not > reliable as well. According to Prarit hardware vendors sometimes screw this > up. > Thus we choose to round up total size to 128M to workaround this problem. > This is a best effort workaround, will improve it when we have better way > in the future. Thanks for this posting. While I don't get the point of this patch. So firmware take piece of memory, then why we need to count it into the total memory which we want to calculate a crashkernel memory based on. Not counting that, is there anyting incorrect? Thanks Baoquan > > Signed-off-by: Dave Young > --- > kernel/crash_core.c | 17 + > 1 file changed, 13 insertions(+), 4 deletions(-) > > --- linux.orig/kernel/crash_core.c > +++ linux/kernel/crash_core.c > @@ -42,6 +42,15 @@ static int __init parse_crashkernel_mem( > { > char *cur = cmdline, *tmp; > bool infinite_end = false; > + unsigned long long total_mem = system_ram; > + > + /* > + * Firmware usually reserves some memory regions for it's own use. > + * so we get less than actual system memory size. > + * We workaround this by round up the total size to 128M which is > + * enough for most test cases. > + */ > + total_mem = roundup(total_mem, 0x800); > > /* for each entry of the comma-separated list */ > do { > @@ -86,13 +95,13 @@ static int __init parse_crashkernel_mem( > return -EINVAL; > } > cur = tmp; > - if (size >= system_ram) { > + if (size >= total_mem) { > pr_warn("crashkernel: invalid size\n"); > return -EINVAL; > } > > /* match ? */ > - if (system_ram >= start && system_ram < end) { > + if (total_mem >= start && total_mem < end) { > *crash_size = size; > if (end == ULLONG_MAX) > infinite_end = true; > @@ -126,9 +135,9 @@ static int __init parse_crashkernel_mem( > pr_warn("Memory reservation scale order > expected after '^'\n"); > return -EINVAL; > } > - size = (system_ram - *crash_size) >> shift; > + size = (total_mem - *crash_size) >> shift; > size = *crash_size + roundup(size, 1ULL << 20); > - if (size < system_ram) > + if (size < total_mem) > *crash_size = size; > cur = tmp; > } else > > ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 2/3] X86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM
Now crashkernel=X will fail if there's not enough memory at low region (below 896M) when trying to reserve large memory size. One can use crashkernel=xM,high to reserve it at high region (>4G) but it is more convinient to improve crashkernel=X to: - First try to reserve X below 896M (for being compatible with old kexec-tools). - If fails, try to reserve X below 4G (swiotlb need to stay below 4G). - If fails, try to reserve X from MAXMEM top down. It's more transparent and user-friendly. If crashkernel is large and the reserved is beyond 896M, old kexec-tools is not compatible with new kernel because old kexec-tools can not load kernel at high memory region, there was an old discussion below: https://lkml.org/lkml/2013/10/15/601 But actually the behavior is consistent during my test. Suppose old kernel fail to reserve memory at low areas, kdump does not work because no meory reserved. With this patch, suppose new kernel successfully reserved memory at high areas, old kexec-tools still fail to load kdump kernel (tested 2.0.2), so it is acceptable, no need to worry about the compatibility. Here is the test result (kexec-tools 2.0.2, no high memory load support): Crashkernel over 4G: # cat /proc/iomem|grep Crash be00-cdff : Crash kernel 21300-21eff : Crash kernel # ./kexec -p /boot/vmlinuz-`uname -r` Memory for crashkernel is not reserved Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel Then try loading kdump kernel crashkernel: 896M-4G: # cat /proc/iomem|grep Crash 9600-cdef : Crash kernel # ./kexec -p /boot/vmlinuz-4.14.0-rc4+ ELF core (kcore) parse failed Cannot load /boot/vmlinuz-4.14.0-rc4+ Signed-off-by: Dave Young --- arch/x86/kernel/setup.c | 16 1 file changed, 16 insertions(+) --- linux-x86.orig/arch/x86/kernel/setup.c +++ linux-x86/arch/x86/kernel/setup.c @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v high ? CRASH_ADDR_HIGH_MAX : CRASH_ADDR_LOW_MAX, crash_size, CRASH_ALIGN); +#ifdef CONFIG_X86_64 + /* +* crashkernel=X reserve below 896M fails? Try below 4G +*/ + if (!high && !crash_base) + crash_base = memblock_find_in_range(CRASH_ALIGN, + (1ULL << 32), + crash_size, CRASH_ALIGN); + /* +* crashkernel=X reserve below 4G fails? Try MAXMEM +*/ + if (!high && !crash_base) + crash_base = memblock_find_in_range(CRASH_ALIGN, + CRASH_ADDR_HIGH_MAX, + crash_size, CRASH_ALIGN); +#endif if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 3/3] kdump: round up the total memory size to 128M for crashkernel reservation
The total memory size we get in kernel is usually slightly less than 2G with a 2G memory module machine. The main reason is bios/firmware reserve some area it will not export all memory as usable to Linux. 2G memory X86 kvm guest test result of the total_mem value: UEFI boot with ovmf: 0x7ef1 Legacy boot kvm guest: 0x7ff7cc00 An option is to use dmi/smbios to get physical memory size, but it's not reliable as well. According to Prarit hardware vendors sometimes screw this up. Thus we choose to round up total size to 128M to workaround this problem. This is a best effort workaround, will improve it when we have better way in the future. Signed-off-by: Dave Young --- kernel/crash_core.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) --- linux.orig/kernel/crash_core.c +++ linux/kernel/crash_core.c @@ -42,6 +42,15 @@ static int __init parse_crashkernel_mem( { char *cur = cmdline, *tmp; bool infinite_end = false; + unsigned long long total_mem = system_ram; + + /* +* Firmware usually reserves some memory regions for it's own use. +* so we get less than actual system memory size. +* We workaround this by round up the total size to 128M which is +* enough for most test cases. +*/ + total_mem = roundup(total_mem, 0x800); /* for each entry of the comma-separated list */ do { @@ -86,13 +95,13 @@ static int __init parse_crashkernel_mem( return -EINVAL; } cur = tmp; - if (size >= system_ram) { + if (size >= total_mem) { pr_warn("crashkernel: invalid size\n"); return -EINVAL; } /* match ? */ - if (system_ram >= start && system_ram < end) { + if (total_mem >= start && total_mem < end) { *crash_size = size; if (end == ULLONG_MAX) infinite_end = true; @@ -126,9 +135,9 @@ static int __init parse_crashkernel_mem( pr_warn("Memory reservation scale order expected after '^'\n"); return -EINVAL; } - size = (system_ram - *crash_size) >> shift; + size = (total_mem - *crash_size) >> shift; size = *crash_size + roundup(size, 1ULL << 20); - if (size < system_ram) + if (size < total_mem) *crash_size = size; cur = tmp; } else ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 1/3] kdump: extend crashkernel=range:size to dynamically increase reservation size
crashkernel=range:size syntax allows to reserve specified size for system with total memory fall into the specified range. For example: crashkernel=2G-3G:128M,3G-:256M reserves 128M for system with memory >=2G and memory <3G, and reserves 256M for system with memory >= 3G In the above case 256M as a fixed value which can not fulfill very huge systems with large memory and IO devices. As memory size increases usually minimum memory requirement for booting will also increase. It is nearly impossible for a kernel to run with a fixed limited memory size for all kinds of systems. Thus extend the crashkernel=range:size to let user specify the scaling ratio in kernel cmdline like below: crashkernel=range:size^order, for example: crashkernel=2G-:128M^14 reserve 128M + (total_memory - 128M) >> 14 for machines with over 2G memory. Also s/start-[end]/[start]-end/ in kernel-parameters.txt according to code Signed-off-by: Dave Young --- Documentation/admin-guide/kernel-parameters.txt | 10 -- Documentation/kdump/kdump.txt |7 ++-- kernel/crash_core.c | 35 +--- 3 files changed, 41 insertions(+), 11 deletions(-) --- linux-x86.orig/kernel/crash_core.c +++ linux-x86/kernel/crash_core.c @@ -31,7 +31,7 @@ static unsigned char *vmcoreinfo_data_sa /* * This function parses command lines in the format * - * crashkernel=ramsize-range:size[,...][@offset] + * crashkernel=ramsize-range:size[,...][@offset][^order] * * The function returns 0 on success and -EINVAL on failure. */ @@ -41,6 +41,7 @@ static int __init parse_crashkernel_mem( unsigned long long *crash_base) { char *cur = cmdline, *tmp; + bool infinite_end = false; /* for each entry of the comma-separated list */ do { @@ -93,13 +94,21 @@ static int __init parse_crashkernel_mem( /* match ? */ if (system_ram >= start && system_ram < end) { *crash_size = size; + if (end == ULLONG_MAX) + infinite_end = true; break; } } while (*cur++ == ','); - if (*crash_size > 0) { - while (*cur && *cur != ' ' && *cur != '@') + if (*crash_size <= 0) + goto out; + + while (*cur && *cur != ' ') { + if (*cur != '@' && *cur != '^') { cur++; + continue; + } + if (*cur == '@') { cur++; *crash_base = memparse(cur, &tmp); @@ -107,10 +116,28 @@ static int __init parse_crashkernel_mem( pr_warn("Memory value expected after '@'\n"); return -EINVAL; } - } + cur = tmp; + } else if (*cur == '^' && infinite_end ) { + unsigned long long shift, size; + + cur++; + shift = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("Memory reservation scale order expected after '^'\n"); + return -EINVAL; + } + size = (system_ram - *crash_size) >> shift; + size = *crash_size + roundup(size, 1ULL << 20); + if (size < system_ram) + *crash_size = size; + cur = tmp; + } else + cur++; } return 0; +out: + return -EINVAL; } /* --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt @@ -680,12 +680,14 @@ is selected automatically. Check Documentation/kdump/kdump.txt for further details. - crashkernel=range1:size1[,range2:size2,...][@offset] + crashkernel=range1:size1[,range2:size2,...][@offset][^order] [KNL] Same as above, but depends on the memory in the running system. The syntax of range is - start-[end] where start and end are both - a memory unit (amount[KMG]). See also - Documentation/kdump/kdump.txt for an example. + [start]-end where start and end are both + a memory unit (amount[KMG]). In case the end of the + range is infinity '^order' can be used to scale + the size to size + (total_mem - start) >> 2^order + See also Documentation/kdump/kdump.txt for an example. crashkernel=size[KMG],high [KNL, x86_64] range could be above 4G. Allow kernel --- li
[PATCH 0/3] kdump: crashkernel parameter improvement
Hi, Here is a try to improve current crashkernel kernel parameter Patch 1/3 adds an extra functionality so that one can use like crashkernel=2G-:128M^12 to reserve 128M for 2G+ machine but also scale the size based on system memory, that means 128M + (total_mem - 128M) >> 12 Patch 2/3 is a resending of previous post to let crashkernel=xM to reserve first from below 896M, then <4G, then http://lists.infradead.org/mailman/listinfo/kexec
Re: x86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM
On 10/20/17 at 01:52pm, Dave Young wrote: > Now crashkernel=X will fail if there's not enough memory at low region > (below 896M) when trying to reserve large memory size. One can use > crashkernel=xM,high to reserve it at high region (>4G) but it is more > convinient to improve crashkernel=X to: > > - First try to reserve X below 896M (for being compatible with old >kexec-tools). > - If fails, try to reserve X below 4G (swiotlb need to stay below 4G). > - If fails, try to reserve X from MAXMEM top down. > > It's more transparent and user-friendly. > > If crashkernel is large and the reserved is beyond 896M, old kexec-tools > is not compatible with new kernel because old kexec-tools can not load > kernel at high memory region, there was an old discussion below > (previously posted by Chao Wang): > https://lkml.org/lkml/2013/10/15/601 > > But actually the behavior is consistent during my test. Suppose > old kernel fail to reserve memory at low areas, kdump does not > work because no memory reserved. With this patch, suppose new kernel > successfully reserved memory at high areas, old kexec-tools still fail > to load kdump kernel (tested 2.0.2), so it is acceptable, no need to > worry about the compatibility. > > Here is the test result (kexec-tools 2.0.2, no high memory load > support): > Crashkernel over 4G: > # cat /proc/iomem|grep Crash > be00-cdff : Crash kernel > 21300-21eff : Crash kernel > # ./kexec -p /boot/vmlinuz-`uname -r` > Memory for crashkernel is not reserved > Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel > Then try loading kdump kernel > > crashkernel: 896M-4G: > # cat /proc/iomem|grep Crash > 9600-cdef : Crash kernel > # ./kexec -p /boot/vmlinuz-4.14.0-rc4+ > ELF core (kcore) parse failed > Cannot load /boot/vmlinuz-4.14.0-rc4+ > > Signed-off-by: Dave Young > --- > arch/x86/kernel/setup.c | 16 > 1 file changed, 16 insertions(+) > > --- linux-x86.orig/arch/x86/kernel/setup.c > +++ linux-x86/arch/x86/kernel/setup.c > @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v > high ? CRASH_ADDR_HIGH_MAX >: CRASH_ADDR_LOW_MAX, > crash_size, CRASH_ALIGN); > +#ifdef CONFIG_X86_64 > + /* > + * crashkernel=X reserve below 896M fails? Try below 4G > + */ > + if (!high && !crash_base) > + crash_base = memblock_find_in_range(CRASH_ALIGN, > + (1ULL << 32), > + crash_size, CRASH_ALIGN); > + /* > + * crashkernel=X reserve below 4G fails? Try MAXMEM > + */ > + if (!high && !crash_base) > + crash_base = memblock_find_in_range(CRASH_ALIGN, > + CRASH_ADDR_HIGH_MAX, > + crash_size, CRASH_ALIGN); > +#endif > if (!crash_base) { > pr_info("crashkernel reservation failed - No suitable > area found.\n"); > return; Hmm, forgot to add [PATCH] prefix, will resend this along with other two crashkernel patches. Thanks Dave ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 2/2] ARM: read kernel size from zImage
Read the new extension data which tells the boot agent about the requirements for booting the kernel image, such as how much RAM will be consumed by the kernel through decompression and booting. This is necessary to control the placement of the DTB and compressed RAM disk to avoid these objects being corrupted. Tested-by: Tony Lindgren Signed-off-by: Russell King --- kexec/arch/arm/kexec-zImage-arm.c | 51 ++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/kexec/arch/arm/kexec-zImage-arm.c b/kexec/arch/arm/kexec-zImage-arm.c index c6ecb04..a8c40cb 100644 --- a/kexec/arch/arm/kexec-zImage-arm.c +++ b/kexec/arch/arm/kexec-zImage-arm.c @@ -34,6 +34,15 @@ struct zimage_header { #define ZIMAGE_MAGIC cpu_to_le32(0x016f2818) uint32_t start; uint32_t end; + uint32_t endian; + + /* Extension to the data passed to the boot agent. The offset +* points at a tagged table following a similar format to the +* ATAGs. +*/ + uint32_t magic2; +#define ZIMAGE_MAGIC2 (0x45454545) + uint32_t extension_tag_offset; }; struct android_image { @@ -114,6 +123,17 @@ struct tag { #define byte_size(t)((t)->hdr.size << 2) #define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type) + 3) >> 2) +struct zimage_tag { + struct tag_header hdr; + union { +#define ZIMAGE_TAG_KRNL_SIZE cpu_to_le32(0x5a534c4b) + struct zimage_krnl_size { + uint32_t size_ptr; + uint32_t bss_size; + } krnl_size; + } u; +}; + int zImage_arm_probe(const char *UNUSED(buf), off_t UNUSED(len)) { /* @@ -434,7 +454,7 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, if (dtb_file) dtb_buf = slurp_file(dtb_file, &dtb_length); - if (len > 0x34) { + if (len > sizeof(struct zimage_header)) { const struct zimage_header *hdr; off_t size; @@ -460,6 +480,35 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, if (size < len) len = size; } + + /* Do we have an extension table? */ + if (hdr->magic2 == ZIMAGE_MAGIC2 && !kexec_arm_image_size) { + uint32_t offset = hdr->extension_tag_offset; + uint32_t max = len - sizeof(struct tag_header); + struct zimage_tag *tag; + + dbgprintf("zImage has tags\n"); + + for (offset = hdr->extension_tag_offset; +(tag = (void *)(buf + offset)) != NULL && +offset < max && byte_size(tag) && + offset + byte_size(tag) < len; +offset += byte_size(tag)) { + dbgprintf(" offset 0x%08x tag 0x%08x size %u\n", + offset, tag->hdr.tag, byte_size(tag)); + if (tag->hdr.tag == ZIMAGE_TAG_KRNL_SIZE) { + uint32_t *p = (void *)buf + + tag->u.krnl_size.size_ptr; + + kexec_arm_image_size = + get_unaligned(p) + + tag->u.krnl_size.bss_size; + } + } + + dbgprintf("kernel image size: 0x%08x\n", + kexec_arm_image_size); + } } /* Handle android images, 2048 is the minimum page size */ -- 2.7.4 -- RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up According to speedtest.net: 8.21Mbps down 510kbps up ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 1/2] ARM: cleanup initrd and dtb handing
There is no difference in the way the initrd is handled between an ATAG-based kernel and a DTB-based kernel. Therefore, this should be handled identically in both cases. Rearrange the code to achieve this. Signed-off-by: Russell King --- kexec/arch/arm/kexec-zImage-arm.c | 116 +++--- 1 file changed, 57 insertions(+), 59 deletions(-) diff --git a/kexec/arch/arm/kexec-zImage-arm.c b/kexec/arch/arm/kexec-zImage-arm.c index 7f02b93..c6ecb04 100644 --- a/kexec/arch/arm/kexec-zImage-arm.c +++ b/kexec/arch/arm/kexec-zImage-arm.c @@ -24,7 +24,7 @@ #define BOOT_PARAMS_SIZE 1536 -off_t initrd_base = 0, initrd_size = 0; +off_t initrd_base, initrd_size; unsigned int kexec_arm_image_size = 0; unsigned long long user_page_offset = (-1ULL); @@ -200,14 +200,12 @@ int create_mem32_tag(struct tag_mem32 *tag_mem32) static int atag_arm_load(struct kexec_info *info, unsigned long base, - const char *command_line, off_t command_line_len, - const char *initrd, off_t initrd_len, off_t initrd_off) + const char *command_line, off_t command_line_len, const char *initrd) { struct tag *saved_tags = atag_read_tags(); char *buf; off_t len; struct tag *params; - uint32_t *initrd_start = NULL; buf = xmalloc(getpagesize()); if (!buf) { @@ -251,8 +249,8 @@ int atag_arm_load(struct kexec_info *info, unsigned long base, if (initrd) { params->hdr.size = tag_size(tag_initrd); params->hdr.tag = ATAG_INITRD2; - initrd_start = ¶ms->u.initrd.start; - params->u.initrd.size = initrd_len; + params->u.initrd.start = initrd_base; + params->u.initrd.size = initrd_size; params = tag_next(params); } @@ -272,14 +270,6 @@ int atag_arm_load(struct kexec_info *info, unsigned long base, add_segment(info, buf, len, base, len); - if (initrd) { - *initrd_start = locate_hole(info, initrd_len, getpagesize(), - initrd_off, ULONG_MAX, INT_MAX); - if (*initrd_start == ULONG_MAX) - return -1; - add_segment(info, initrd, initrd_len, *initrd_start, initrd_len); - } - return 0; } @@ -348,6 +338,7 @@ static int setup_dtb_prop(char **bufp, off_t *sizep, int parentoffset, int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, struct kexec_info *info) { + unsigned long page_size = getpagesize(); unsigned long base, kernel_base; unsigned int atag_offset = 0x1000; /* 4k offset from memory start */ unsigned int extra_size = 0x8000; /* TEXT_OFFSET */ @@ -513,6 +504,14 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, kernel_mem_size = len + 4; /* +* If the user didn't specify the size of the image, assume the +* maximum kernel compression ratio is 4. Note that we must +* include space for the compressed image here as well. +*/ + if (!kexec_arm_image_size) + kexec_arm_image_size = len * 5; + + /* * If we are loading a dump capture kernel, we need to update kernel * command line and also add some additional segments. */ @@ -562,17 +561,28 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, kernel_base = base + extra_size; - if (kexec_arm_image_size) { - /* If the image size was passed as command line argument, -* use that value for determining the address for initrd, -* atags and dtb images. page-align the given length.*/ - initrd_base = kernel_base + _ALIGN(kexec_arm_image_size, getpagesize()); - } else { - /* Otherwise, assume the maximum kernel compression ratio -* is 4, and just to be safe, place ramdisk after that. -* Note that we must include space for the compressed -* image here as well. */ - initrd_base = kernel_base + _ALIGN(len * 5, getpagesize()); + /* +* Calculate the minimum address of the initrd, which must be +* above the memory used by the zImage while it runs. This +* needs to be page-size aligned. +*/ + initrd_base = kernel_base + _ALIGN(kexec_arm_image_size, page_size); + + if (ramdisk_buf) { + /* +* Find a hole to place the initrd. The crash kernel use +* fixed address, so no check is ok. +*/ + if (!(info->kexec_flags & KEXEC_ON_CRASH)) { + initrd_base = locate_hole(info, initrd_size, page_size, + initrd_base, + ULONG_MAX, INT_MAX); + if (initrd_base == UL