[PATCH v6 03/10] kexec_file: factor out arch_kexec_kernel_*() from x86, powerpc

2017-10-23 Thread AKASHI Takahiro
arch_kexec_kernel_*() and arch_kimage_file_post_load_cleanup can now be
duplicated among some architectures, so let's factor them out.

Signed-off-by: AKASHI Takahiro 
Cc: Dave Young 
Cc: Vivek Goyal 
Cc: Baoquan He 
Cc: Michael Ellerman 
Cc: Thiago Jung Bauermann 
---
 arch/powerpc/include/asm/kexec.h|  2 +-
 arch/powerpc/kernel/kexec_elf_64.c  |  2 +-
 arch/powerpc/kernel/machine_kexec_file_64.c | 39 ++--
 arch/x86/include/asm/kexec-bzimage64.h  |  2 +-
 arch/x86/kernel/kexec-bzimage64.c   |  2 +-
 arch/x86/kernel/machine_kexec_64.c  | 45 +--
 include/linux/kexec.h   | 15 
 kernel/kexec_file.c | 57 +++--
 8 files changed, 70 insertions(+), 94 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 25668bc8cb2a..23588952d8bd 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -92,7 +92,7 @@ static inline bool kdump_in_progress(void)
 }
 
 #ifdef CONFIG_KEXEC_FILE
-extern struct kexec_file_ops kexec_elf64_ops;
+extern const struct kexec_file_ops kexec_elf64_ops;
 
 #ifdef CONFIG_IMA_KEXEC
 #define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/powerpc/kernel/kexec_elf_64.c 
b/arch/powerpc/kernel/kexec_elf_64.c
index 9a42309b091a..6c78c11c7faf 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -657,7 +657,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
return ret ? ERR_PTR(ret) : fdt;
 }
 
-struct kexec_file_ops kexec_elf64_ops = {
+const struct kexec_file_ops kexec_elf64_ops = {
.probe = elf64_probe,
.load = elf64_load,
 };
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c 
b/arch/powerpc/kernel/machine_kexec_file_64.c
index 992c0d258e5d..cd2a7671f5c6 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -31,52 +31,19 @@
 
 #define SLAVE_CODE_SIZE256
 
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_elf64_ops,
+   NULL
 };
 
 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
  unsigned long buf_len)
 {
-   int i, ret = -ENOEXEC;
-   struct kexec_file_ops *fops;
-
/* We don't support crash kernels yet. */
if (image->type == KEXEC_TYPE_CRASH)
return -ENOTSUPP;
 
-   for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
-   fops = kexec_file_loaders[i];
-   if (!fops || !fops->probe)
-   continue;
-
-   ret = fops->probe(buf, buf_len);
-   if (!ret) {
-   image->fops = fops;
-   return ret;
-   }
-   }
-
-   return ret;
-}
-
-void *arch_kexec_kernel_image_load(struct kimage *image)
-{
-   if (!image->fops || !image->fops->load)
-   return ERR_PTR(-ENOEXEC);
-
-   return image->fops->load(image, image->kernel_buf,
-image->kernel_buf_len, image->initrd_buf,
-image->initrd_buf_len, image->cmdline_buf,
-image->cmdline_buf_len);
-}
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
-   if (!image->fops || !image->fops->cleanup)
-   return 0;
-
-   return image->fops->cleanup(image->image_loader_data);
+   return _kexec_kernel_image_probe(image, buf, buf_len);
 }
 
 /**
diff --git a/arch/x86/include/asm/kexec-bzimage64.h 
b/arch/x86/include/asm/kexec-bzimage64.h
index d1b5d194e31d..284fd23d133b 100644
--- a/arch/x86/include/asm/kexec-bzimage64.h
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -1,6 +1,6 @@
 #ifndef _ASM_KEXEC_BZIMAGE64_H
 #define _ASM_KEXEC_BZIMAGE64_H
 
-extern struct kexec_file_ops kexec_bzImage64_ops;
+extern const struct kexec_file_ops kexec_bzImage64_ops;
 
 #endif  /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/kernel/kexec-bzimage64.c 
b/arch/x86/kernel/kexec-bzimage64.c
index fb095ba0c02f..705654776c0c 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -538,7 +538,7 @@ static int bzImage64_verify_sig(const char *kernel, 
unsigned long kernel_len)
 }
 #endif
 
-struct kexec_file_ops kexec_bzImage64_ops = {
+const struct kexec_file_ops kexec_bzImage64_ops = {
.probe = bzImage64_probe,
.load = bzImage64_load,
.cleanup = bzImage64_cleanup,
diff --git a/arch/x86/kernel/machine_kexec_64.c 
b/arch/x86/kernel/machine_kexec_64.c
index 1f790cf9d38f..2cdd29d64181 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -30,8 +30,9 @@
 #include 
 
 #ifdef CONFIG_KEXEC_FILE
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * cons

[PATCH v6 10/10] arm64: kexec_file: add Image format support

2017-10-23 Thread AKASHI Takahiro
The "Image" binary will be loaded at the offset of TEXT_OFFSET from
the start of system memory. TEXT_OFFSET is determined from the header
of the image.

Regarding kernel signature verification, it will be done through
verify_pefile_signature() as arm64's "Image" binary can be seen as
in PE format. This approach is consistent with x86 implementation.

we can sign an image with sbsign command.

Signed-off-by: AKASHI Takahiro 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/Kconfig |   7 +++
 arch/arm64/include/asm/kexec.h |  66 +
 arch/arm64/kernel/Makefile |   1 +
 arch/arm64/kernel/kexec_image.c| 105 +
 arch/arm64/kernel/machine_kexec_file.c |   3 +
 5 files changed, 182 insertions(+)
 create mode 100644 arch/arm64/kernel/kexec_image.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e37be8a59a88..a9ef277faa3e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -770,10 +770,17 @@ config KEXEC_FILE
  In addition to this option, you need to enable a specific type
  of image support.
 
+config KEXEC_FILE_IMAGE_FMT
+   bool "Enable Image support"
+   depends on KEXEC_FILE
+   ---help---
+ Select this option to enable 'Image' kernel loading.
+
 config KEXEC_VERIFY_SIG
bool "Verify kernel signature during kexec_file_load() syscall"
depends on KEXEC_FILE
select SYSTEM_DATA_VERIFICATION
+   select SIGNED_PE_FILE_VERIFICATION if KEXEC_FILE_IMAGE_FMT
---help---
  Select this option to verify a signature with loaded kernel
  image. If configured, any attempt of loading a image without
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index edb702e64a8a..2a63bf5f32ea 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -104,6 +104,72 @@ struct kimage_arch {
unsigned long elf_load_addr;
 };
 
+/**
+ * struct arm64_image_header - arm64 kernel image header
+ *
+ * @pe_sig: Optional PE format 'MZ' signature
+ * @branch_code: Instruction to branch to stext
+ * @text_offset: Image load offset, little endian
+ * @image_size: Effective image size, little endian
+ * @flags:
+ * Bit 0: Kernel endianness. 0=little endian, 1=big endian
+ * @reserved: Reserved
+ * @magic: Magic number, "ARM\x64"
+ * @pe_header: Optional offset to a PE format header
+ **/
+
+struct arm64_image_header {
+   u8 pe_sig[2];
+   u8 pad[2];
+   u32 branch_code;
+   u64 text_offset;
+   u64 image_size;
+   u64 flags;
+   u64 reserved[3];
+   u8 magic[4];
+   u32 pe_header;
+};
+
+static const u8 arm64_image_magic[4] = {'A', 'R', 'M', 0x64U};
+static const u8 arm64_image_pe_sig[2] = {'M', 'Z'};
+
+/**
+ * arm64_header_check_magic - Helper to check the arm64 image header.
+ *
+ * Returns non-zero if header is OK.
+ */
+
+static inline int arm64_header_check_magic(const struct arm64_image_header *h)
+{
+   if (!h)
+   return 0;
+
+   if (!h->text_offset)
+   return 0;
+
+   return (h->magic[0] == arm64_image_magic[0]
+   && h->magic[1] == arm64_image_magic[1]
+   && h->magic[2] == arm64_image_magic[2]
+   && h->magic[3] == arm64_image_magic[3]);
+}
+
+/**
+ * arm64_header_check_pe_sig - Helper to check the arm64 image header.
+ *
+ * Returns non-zero if 'MZ' signature is found.
+ */
+
+static inline int arm64_header_check_pe_sig(const struct arm64_image_header *h)
+{
+   if (!h)
+   return 0;
+
+   return (h->pe_sig[0] == arm64_image_pe_sig[0]
+   && h->pe_sig[1] == arm64_image_pe_sig[1]);
+}
+
+extern const struct kexec_file_ops kexec_image_ops;
+
 struct kimage;
 
 #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 5df003d6157c..a1161bab6810 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -51,6 +51,7 @@ arm64-obj-$(CONFIG_HIBERNATION)   += hibernate.o 
hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
\
   cpu-reset.o
 arm64-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o
+arm64-obj-$(CONFIG_KEXEC_FILE_IMAGE_FMT)   += kexec_image.o
 arm64-obj-$(CONFIG_ARM64_RELOC_TEST)   += arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
new file mode 100644
index ..b840b6ed6ed9
--- /dev/null
+++ b/arch/arm64/kernel/kexec_image.c
@@ -0,0 +1,105 @@
+/*
+ * Kexec image loader
+
+ * Copyright (C) 2017 Linaro Limited
+ * Author: AKASHI Takahiro 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the 

[PATCH v6 02/10] resource: add walk_system_ram_res_rev()

2017-10-23 Thread AKASHI Takahiro
This function, being a variant of walk_system_ram_res() introduced in
commit 8c86e70acead ("resource: provide new functions to walk through
resources"), walks through a list of all the resources of System RAM
in reversed order, i.e., from higher to lower.

It will be used in kexec_file implementation on arm64.

Signed-off-by: AKASHI Takahiro 
Cc: Vivek Goyal 
Cc: Andrew Morton 
Cc: Linus Torvalds 
---
 include/linux/ioport.h |  3 +++
 kernel/resource.c  | 57 ++
 2 files changed, 60 insertions(+)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index f5cf32e80041..62eb62b98118 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -273,6 +273,9 @@ extern int
 walk_system_ram_res(u64 start, u64 end, void *arg,
int (*func)(u64, u64, void *));
 extern int
+walk_system_ram_res_rev(u64 start, u64 end, void *arg,
+   int (*func)(u64, u64, void *));
+extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 
end,
void *arg, int (*func)(u64, u64, void *));
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 9b5f04404152..69c31454cb2e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -23,6 +23,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 
@@ -469,6 +471,61 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
return ret;
 }
 
+int walk_system_ram_res_rev(u64 start, u64 end, void *arg,
+   int (*func)(u64, u64, void *))
+{
+   struct resource res, *rams;
+   int rams_size = 16, i;
+   int ret = -1;
+
+   /* create a list */
+   rams = vmalloc(sizeof(struct resource) * rams_size);
+   if (!rams)
+   return ret;
+
+   res.start = start;
+   res.end = end;
+   res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+   i = 0;
+   while ((res.start < res.end) &&
+   (!find_next_iomem_res(&res, IORES_DESC_NONE, true))) {
+   if (i >= rams_size) {
+   /* re-alloc */
+   struct resource *rams_new;
+   int rams_new_size;
+
+   rams_new_size = rams_size + 16;
+   rams_new = vmalloc(sizeof(struct resource)
+   * rams_new_size);
+   if (!rams_new)
+   goto out;
+
+   memcpy(rams_new, rams,
+   sizeof(struct resource) * rams_size);
+   vfree(rams);
+   rams = rams_new;
+   rams_size = rams_new_size;
+   }
+
+   rams[i].start = res.start;
+   rams[i++].end = res.end;
+
+   res.start = res.end + 1;
+   res.end = end;
+   }
+
+   /* go reverse */
+   for (i--; i >= 0; i--) {
+   ret = (*func)(rams[i].start, rams[i].end, arg);
+   if (ret)
+   break;
+   }
+
+out:
+   vfree(rams);
+   return ret;
+}
+
 #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 
 /*
-- 
2.14.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 09/10] arm64: enable KEXEC_FILE config

2017-10-23 Thread AKASHI Takahiro
Modify arm64/Kconfig and Makefile to enable kexec_file_load support.

Signed-off-by: AKASHI Takahiro 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/Kconfig | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0df64a6a56d4..e37be8a59a88 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -757,6 +757,28 @@ config KEXEC
  but it is independent of the system firmware.   And like a reboot
  you can start any kernel with it, not just Linux.
 
+config KEXEC_FILE
+   bool "kexec file based system call"
+   select KEXEC_CORE
+   select BUILD_BIN2C
+   ---help---
+ This is new version of kexec system call. This system call is
+ file based and takes file descriptors as system call argument
+ for kernel and initramfs as opposed to list of segments as
+ accepted by previous system call.
+
+ In addition to this option, you need to enable a specific type
+ of image support.
+
+config KEXEC_VERIFY_SIG
+   bool "Verify kernel signature during kexec_file_load() syscall"
+   depends on KEXEC_FILE
+   select SYSTEM_DATA_VERIFICATION
+   ---help---
+ Select this option to verify a signature with loaded kernel
+ image. If configured, any attempt of loading a image without
+ valid signature will fail.
+
 config CRASH_DUMP
bool "Build kdump crash kernel"
help
-- 
2.14.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 07/10] arm64: kexec_file: load initrd, device-tree and purgatory segments

2017-10-23 Thread AKASHI Takahiro
load_other_segments() sets up and adds all the memory segments necessary
other than kernel, including initrd, device-tree blob and purgatory.
Most of the code was borrowed from kexec-tools' counterpart.

arch_kimage_kernel_post_load_cleanup() is meant to free arm64-specific data
allocated for loading kernel.

Signed-off-by: AKASHI Takahiro 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/include/asm/kexec.h |  22 
 arch/arm64/kernel/Makefile |   3 +-
 arch/arm64/kernel/machine_kexec_file.c | 213 +
 3 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/machine_kexec_file.c

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index e17f0529a882..2fadd3cbf3af 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -93,6 +93,28 @@ static inline void crash_prepare_suspend(void) {}
 static inline void crash_post_resume(void) {}
 #endif
 
+#ifdef CONFIG_KEXEC_FILE
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+   void *dtb_buf;
+};
+
+struct kimage;
+
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
+
+extern int setup_dtb(struct kimage *image,
+   unsigned long initrd_load_addr, unsigned long initrd_len,
+   char *cmdline, unsigned long cmdline_len,
+   char **dtb_buf, size_t *dtb_buf_len);
+extern int load_other_segments(struct kimage *image,
+   unsigned long kernel_load_addr,
+   char *initrd, unsigned long initrd_len,
+   char *cmdline, unsigned long cmdline_len);
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index f2b4e816b6de..5df003d6157c 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -48,8 +48,9 @@ arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)   += 
acpi_parking_protocol.o
 arm64-obj-$(CONFIG_PARAVIRT)   += paravirt.o
 arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)+= hibernate.o hibernate-asm.o
-arm64-obj-$(CONFIG_KEXEC)  += machine_kexec.o relocate_kernel.o
\
+arm64-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
\
   cpu-reset.o
+arm64-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o
 arm64-obj-$(CONFIG_ARM64_RELOC_TEST)   += arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
diff --git a/arch/arm64/kernel/machine_kexec_file.c 
b/arch/arm64/kernel/machine_kexec_file.c
new file mode 100644
index ..81af1602e149
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -0,0 +1,213 @@
+/*
+ * kexec_file for arm64
+ *
+ * Copyright (C) 2017 Linaro Limited
+ * Author: AKASHI Takahiro 
+ *
+ * Most code is derived from arm64 port of kexec-tools
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "kexec_file: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static int __dt_root_addr_cells;
+static int __dt_root_size_cells;
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+   NULL
+};
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+   vfree(image->arch.dtb_buf);
+   image->arch.dtb_buf = NULL;
+
+   return _kimage_file_post_load_cleanup(image);
+}
+
+int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
+{
+   if (kbuf->image->type == KEXEC_TYPE_CRASH)
+   return walk_iomem_res_desc(crashk_res.desc,
+   IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+   crashk_res.start, crashk_res.end,
+   kbuf, func);
+   else if (kbuf->top_down)
+   return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func);
+   else
+   return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
+}
+
+int setup_dtb(struct kimage *image,
+   unsigned long initrd_load_addr, unsigned long initrd_len,
+   char *cmdline, unsigned long cmdline_len,
+   char **dtb_buf, size_t *dtb_buf_len)
+{
+   char *buf = NULL;
+   size_t buf_size;
+   int nodeoffset;
+   u64 value;
+   int range_len;
+   int ret;
+
+   /* duplicate dt blob */
+   buf_size = fdt_totalsize(initial_boot_params);
+   range_len = (__dt_root_addr_cells + __dt_root_size_cells) * sizeof(u32);
+
+   if (initrd_load_addr)
+   buf_size += fdt_prop_len("initrd-start", sizeof(u64))
+   + fdt_pr

[PATCH v6 01/10] include: pe.h: remove message[] from mz header definition

2017-10-23 Thread AKASHI Takahiro
message[] field won't be part of the definition of mz header.

This change is crucial for enabling kexec_file_load on arm64 because
arm64's "Image" binary, as in PE format, doesn't have any data for it and
accordingly the following check in pefile_parse_binary() will fail:

chkaddr(cursor, mz->peaddr, sizeof(*pe));

Signed-off-by: AKASHI Takahiro 
Reviewed-by: Ard Biesheuvel 
Cc: David Howells 
Cc: Vivek Goyal 
Cc: Herbert Xu 
Cc: David S. Miller 
---
 include/linux/pe.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/pe.h b/include/linux/pe.h
index 143ce75be5f0..3482b18a48b5 100644
--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -166,7 +166,7 @@ struct mz_hdr {
uint16_t oem_info;  /* oem specific */
uint16_t reserved1[10]; /* reserved */
uint32_t peaddr;/* address of pe header */
-   char message[64];   /* message to print */
+   char message[]; /* message to print */
 };
 
 struct mz_reloc {
-- 
2.14.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 08/10] arm64: kexec_file: set up for crash dump adding elf core header

2017-10-23 Thread AKASHI Takahiro
load_crashdump_segments() creates and loads a memory segment of elf core
header for crash dump.

"linux,usable-memory-range" and "linux,elfcorehdr" will add to the 2nd
kernel's device-tree blob. The logic of this cod is also from kexec-tools.

Signed-off-by: AKASHI Takahiro 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/include/asm/kexec.h |   5 ++
 arch/arm64/kernel/machine_kexec_file.c | 151 +
 kernel/kexec_file.c|   2 +-
 3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 2fadd3cbf3af..edb702e64a8a 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -98,6 +98,10 @@ static inline void crash_post_resume(void) {}
 
 struct kimage_arch {
void *dtb_buf;
+   /* Core ELF header buffer */
+   void *elf_headers;
+   unsigned long elf_headers_sz;
+   unsigned long elf_load_addr;
 };
 
 struct kimage;
@@ -113,6 +117,7 @@ extern int load_other_segments(struct kimage *image,
unsigned long kernel_load_addr,
char *initrd, unsigned long initrd_len,
char *cmdline, unsigned long cmdline_len);
+extern int load_crashdump_segments(struct kimage *image);
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/machine_kexec_file.c 
b/arch/arm64/kernel/machine_kexec_file.c
index 81af1602e149..cdedd4b05cac 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static int __dt_root_addr_cells;
 static int __dt_root_size_cells;
@@ -32,6 +33,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
vfree(image->arch.dtb_buf);
image->arch.dtb_buf = NULL;
 
+   vfree(image->arch.elf_headers);
+   image->arch.elf_headers = NULL;
+   image->arch.elf_headers_sz = 0;
+
return _kimage_file_post_load_cleanup(image);
 }
 
@@ -48,6 +53,78 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int 
(*func)(u64, u64, void *))
return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
 }
 
+static int __init arch_kexec_file_init(void)
+{
+   /* Those values are used later on loading the kernel */
+   __dt_root_addr_cells = dt_root_addr_cells;
+   __dt_root_size_cells = dt_root_size_cells;
+
+   return 0;
+}
+late_initcall(arch_kexec_file_init);
+
+#define FDT_ALIGN(x, a)(((x) + (a) - 1) & ~((a) - 1))
+#define FDT_TAGALIGN(x)(FDT_ALIGN((x), FDT_TAGSIZE))
+
+static int fdt_prop_len(const char *prop_name, int len)
+{
+   return (strlen(prop_name) + 1) +
+   sizeof(struct fdt_property) +
+   FDT_TAGALIGN(len);
+}
+
+static bool cells_size_fitted(unsigned long base, unsigned long size)
+{
+   /* if *_cells >= 2, cells can hold 64-bit values anyway */
+   if ((__dt_root_addr_cells == 1) && (base >= (1ULL << 32)))
+   return false;
+
+   if ((__dt_root_size_cells == 1) && (size >= (1ULL << 32)))
+   return false;
+
+   return true;
+}
+
+static void fill_property(void *buf, u64 val64, int cells)
+{
+   u32 val32;
+
+   if (cells == 1) {
+   val32 = cpu_to_fdt32((u32)val64);
+   memcpy(buf, &val32, sizeof(val32));
+   } else {
+   memset(buf, 0, cells * sizeof(u32) - sizeof(u64));
+   buf += cells * sizeof(u32) - sizeof(u64);
+
+   val64 = cpu_to_fdt64(val64);
+   memcpy(buf, &val64, sizeof(val64));
+   }
+}
+
+static int fdt_setprop_range(void *fdt, int nodeoffset, const char *name,
+   unsigned long addr, unsigned long size)
+{
+   void *buf, *prop;
+   size_t buf_size;
+   int result;
+
+   buf_size = (__dt_root_addr_cells + __dt_root_size_cells) * sizeof(u32);
+   prop = buf = vmalloc(buf_size);
+   if (!buf)
+   return -ENOMEM;
+
+   fill_property(prop, addr, __dt_root_addr_cells);
+   prop += __dt_root_addr_cells * sizeof(u32);
+
+   fill_property(prop, size, __dt_root_size_cells);
+
+   result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size);
+
+   vfree(buf);
+
+   return result;
+}
+
 int setup_dtb(struct kimage *image,
unsigned long initrd_load_addr, unsigned long initrd_len,
char *cmdline, unsigned long cmdline_len,
@@ -60,10 +137,26 @@ int setup_dtb(struct kimage *image,
int range_len;
int ret;
 
+   /* check ranges against root's #address-cells and #size-cells */
+   if (image->type == KEXEC_TYPE_CRASH &&
+   (!cells_size_fitted(image->arch.elf_load_addr,
+   image->arch.elf_headers_sz) ||
+!cells_size_fitted(crashk_res.start,
+   crashk_res.end - crashk_res.start + 1))) {
+   pr_err("Crash memory 

[PATCH v6 05/10] asm-generic: add kexec_file_load system call to unistd.h

2017-10-23 Thread AKASHI Takahiro
The initial user of this system call number is arm64.

Signed-off-by: AKASHI Takahiro 
Acked-by: Arnd Bergmann 
---
 include/uapi/asm-generic/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/asm-generic/unistd.h 
b/include/uapi/asm-generic/unistd.h
index 061185a5eb51..086697fe3917 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -731,9 +731,11 @@ __SYSCALL(__NR_pkey_alloc,sys_pkey_alloc)
 __SYSCALL(__NR_pkey_free, sys_pkey_free)
 #define __NR_statx 291
 __SYSCALL(__NR_statx, sys_statx)
+#define __NR_kexec_file_load 292
+__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
 
 #undef __NR_syscalls
-#define __NR_syscalls 292
+#define __NR_syscalls 293
 
 /*
  * All syscalls below here should go away really,
-- 
2.14.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 06/10] arm64: kexec_file: create purgatory

2017-10-23 Thread AKASHI Takahiro
This is a basic purgatory, or a kind of glue code between the two kernels,
for arm64.

Since purgatory is assumed to be relocatable (not executable) object by
kexec generic code, arch_kexec_apply_relocations_add() is required in
general. Arm64's purgatory, however, is a simple asm and all the references
can be resolved as local, no re-linking is needed here.

Please note that even if we don't support digest check at purgatory we
need purgatory_sha_regions and purgatory_sha256_digest as they are
referenced by generic kexec code.

Signed-off-by: AKASHI Takahiro 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/Makefile   |  1 +
 arch/arm64/purgatory/Makefile | 24 +++
 arch/arm64/purgatory/entry.S  | 55 +++
 3 files changed, 80 insertions(+)
 create mode 100644 arch/arm64/purgatory/Makefile
 create mode 100644 arch/arm64/purgatory/entry.S

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 939b310913cf..cf39ec3baf5a 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -110,6 +110,7 @@ core-$(CONFIG_XEN) += arch/arm64/xen/
 core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
 libs-y := arch/arm64/lib/ $(libs-y)
 core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+core-$(CONFIG_KEXEC_FILE) += arch/arm64/purgatory/
 
 # Default target when executing plain make
 boot   := arch/arm64/boot
diff --git a/arch/arm64/purgatory/Makefile b/arch/arm64/purgatory/Makefile
new file mode 100644
index ..c2127a2cbd51
--- /dev/null
+++ b/arch/arm64/purgatory/Makefile
@@ -0,0 +1,24 @@
+OBJECT_FILES_NON_STANDARD := y
+
+purgatory-y := entry.o
+
+targets += $(purgatory-y)
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined \
+   -nostdlib -z nodefaultlib
+targets += purgatory.ro
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+   $(call if_changed,ld)
+
+targets += kexec_purgatory.c
+
+CMD_BIN2C = $(objtree)/scripts/basic/bin2c
+quiet_cmd_bin2c = BIN2C $@
+   cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@
+
+$(obj)/kexec_purgatory.c: $(obj)/purgatory.ro FORCE
+   $(call if_changed,bin2c)
+
+obj-${CONFIG_KEXEC_FILE}   += kexec_purgatory.o
diff --git a/arch/arm64/purgatory/entry.S b/arch/arm64/purgatory/entry.S
new file mode 100644
index ..fe6e968076db
--- /dev/null
+++ b/arch/arm64/purgatory/entry.S
@@ -0,0 +1,55 @@
+/*
+ * kexec core purgatory
+ */
+#include 
+#include 
+
+#define SHA256_DIGEST_SIZE 32 /* defined in crypto/sha.h */
+
+.text
+
+ENTRY(purgatory_start)
+   /* Start new image. */
+   ldr x17, __kernel_entry
+   ldr x0, __dtb_addr
+   mov x1, xzr
+   mov x2, xzr
+   mov x3, xzr
+   br  x17
+END(purgatory_start)
+
+/*
+ * data section:
+ * kernel_entry and dtb_addr are global but also labelled as local,
+ * "__xxx:", to avoid unwanted re-linking.
+ *
+ * purgatory_sha_regions and purgatory_sha256_digest are referenced
+ * by kexec generic code and so must exist, but not actually used
+ * here because hash check is not that useful in purgatory.
+ */
+.align 3
+
+.globl kernel_entry
+kernel_entry:
+__kernel_entry:
+   .quad   0
+END(kernel_entry)
+
+.globl dtb_addr
+dtb_addr:
+__dtb_addr:
+   .quad   0
+END(dtb_addr)
+
+.globl purgatory_sha_regions
+purgatory_sha_regions:
+   .rept   KEXEC_SEGMENT_MAX
+   .quad   0
+   .quad   0
+   .endr
+END(purgatory_sha_regions)
+
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
+.skip   SHA256_DIGEST_SIZE
+END(purgatory_sha256_digest)
-- 
2.14.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 04/10] kexec_file: factor out crashdump elf header function from x86

2017-10-23 Thread AKASHI Takahiro
prepare_elf_headers() can also be useful for other architectures,
including arm64. So let it factored out.

Signed-off-by: AKASHI Takahiro 
Cc: Dave Young 
Cc: Vivek Goyal 
Cc: Baoquan He 
---
 arch/x86/kernel/crash.c | 324 
 include/linux/kexec.h   |  17 +++
 kernel/kexec_file.c | 308 +
 kernel/kexec_internal.h |  20 +++
 4 files changed, 345 insertions(+), 324 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 44404e2307bb..3c6b880f6dbf 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include 
@@ -41,34 +40,6 @@
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
 
-/* This primarily represents number of split ranges due to exclusion */
-#define CRASH_MAX_RANGES   16
-
-struct crash_mem_range {
-   u64 start, end;
-};
-
-struct crash_mem {
-   unsigned int nr_ranges;
-   struct crash_mem_range ranges[CRASH_MAX_RANGES];
-};
-
-/* Misc data about ram ranges needed to prepare elf headers */
-struct crash_elf_data {
-   struct kimage *image;
-   /*
-* Total number of ram ranges we have after various adjustments for
-* crash reserved region, etc.
-*/
-   unsigned int max_nr_ranges;
-
-   /* Pointer to elf header */
-   void *ehdr;
-   /* Pointer to next phdr */
-   void *bufp;
-   struct crash_mem mem;
-};
-
 /* Used while preparing memory map entries for second kernel */
 struct crash_memmap_data {
struct boot_params *params;
@@ -209,301 +180,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
-{
-   unsigned int *nr_ranges = arg;
-
-   (*nr_ranges)++;
-   return 0;
-}
-
-
-/* Gather all the required information to prepare elf headers for ram regions 
*/
-static void fill_up_crash_elf_data(struct crash_elf_data *ced,
-  struct kimage *image)
-{
-   unsigned int nr_ranges = 0;
-
-   ced->image = image;
-
-   walk_system_ram_res(0, -1, &nr_ranges,
-   get_nr_ram_ranges_callback);
-
-   ced->max_nr_ranges = nr_ranges;
-
-   /* Exclusion of crash region could split memory ranges */
-   ced->max_nr_ranges++;
-
-   /* If crashk_low_res is not 0, another range split possible */
-   if (crashk_low_res.end)
-   ced->max_nr_ranges++;
-}
-
-static int exclude_mem_range(struct crash_mem *mem,
-   unsigned long long mstart, unsigned long long mend)
-{
-   int i, j;
-   unsigned long long start, end;
-   struct crash_mem_range temp_range = {0, 0};
-
-   for (i = 0; i < mem->nr_ranges; i++) {
-   start = mem->ranges[i].start;
-   end = mem->ranges[i].end;
-
-   if (mstart > end || mend < start)
-   continue;
-
-   /* Truncate any area outside of range */
-   if (mstart < start)
-   mstart = start;
-   if (mend > end)
-   mend = end;
-
-   /* Found completely overlapping range */
-   if (mstart == start && mend == end) {
-   mem->ranges[i].start = 0;
-   mem->ranges[i].end = 0;
-   if (i < mem->nr_ranges - 1) {
-   /* Shift rest of the ranges to left */
-   for (j = i; j < mem->nr_ranges - 1; j++) {
-   mem->ranges[j].start =
-   mem->ranges[j+1].start;
-   mem->ranges[j].end =
-   mem->ranges[j+1].end;
-   }
-   }
-   mem->nr_ranges--;
-   return 0;
-   }
-
-   if (mstart > start && mend < end) {
-   /* Split original range */
-   mem->ranges[i].end = mstart - 1;
-   temp_range.start = mend + 1;
-   temp_range.end = end;
-   } else if (mstart != start)
-   mem->ranges[i].end = mstart - 1;
-   else
-   mem->ranges[i].start = mend + 1;
-   break;
-   }
-
-   /* If a split happend, add the split to array */
-   if (!temp_range.end)
-   return 0;
-
-   /* Split happened */
-   if (i == CRASH_MAX_RANGES - 1) {
-   pr_err("Too many crash ranges after split\n");
-   return -ENOMEM;
-   }
-
-   /* Location where new range should go */
-   j = i + 1;
-   if (j < mem->nr_ranges) {
-

[PATCH v6 00/10] arm64: kexec: add kexec_file_load() support

2017-10-23 Thread AKASHI Takahiro
This is the sixth round of implementing kexec_file_load() support
on arm64.[1]
Most of the code is based on kexec-tools (along with some kernel code
from x86, which also came from kexec-tools).


This patch series enables us to
  * load the kernel, Image, with kexec_file_load system call, and
  * optionally verify its signature at load time for trusted boot.

To load the kernel via kexec_file_load system call, a small change
is also needed to kexec-tools. See [2]. This enables '-s' option.
(Please use v7.2.1+ crash for v4.14+ kernel)

As we discussed a long time ago, users may not be allowed to specify
device-tree file of the 2nd kernel explicitly with kexec-tools, hence
re-using the blob of the first kernel.

Regarding a signing method, we conform with x86 (or rather Microsoft?)
style of signing since the binary can also be seen as in PE format
(assuming that CONFIG_EFI is enabled).

Powerpc is also going to support extended-file-attribute-based
verification[3] with vmlinux, but arm64 doesn't for now partly
because we don't have TPM-based IMA at this moment.

Accordingly, we can use the existing command, sbsign, to sign the kernel.

$ sbsign --key ${KEY} --cert ${CERT} Image

Please note that it is totally up to the system what key/certificate is
used for signing, but one of easy ways to *try* this feature is to turn on
CONFIG_MODULE_SIG so that we can reuse certs/signing_key.pem as a signing
key, KEY and CERT above, for kernel.
(This also enables CONFIG_CRYPTO_SHA1 by default.)


Some concerns(or future works):
* Even if the kernel is configured with CONFIG_RANDOMIZE_BASE, the 2nd
  kernel won't be placed at a randomized address. We will have to
  add some boot code similar to efi-stub to implement the feature.
* While big-endian kernel can support kernel signing, I'm not sure that
  Image can be recognized as in PE format because x86 standard only
  defines little-endian-based format.
* IMA(and extended file attribute)-based kexec
* vmlinux support

  [1] http://git.linaro.org/people/takahiro.akashi/linux-aarch64.git
branch:arm64/kexec_file
  [2] http://git.linaro.org/people/takahiro.akashi/kexec-tools.git
branch:arm64/kexec_file
  [3] http://lkml.iu.edu//hypermail/linux/kernel/1707.0/03669.html


Changes in v6 (Oct 24, 2017)
* fix a for-loop bug in _kexec_kernel_image_probe() per Julien

Changes in v5 (Oct 10, 2017)
* fix kbuild errors around patch #3
per Julien's comments,
* fix a bug in walk_system_ram_res_rev() with some cleanup
* modify fdt_setprop_range() to use vmalloc()
* modify fill_property() to use memset()

Changes in v4 (Oct 2, 2017)
* reinstate x86's arch_kexec_kernel_image_load()
* rename weak arch_kexec_kernel_xxx() to _kexec_kernel_xxx() for
  better re-use
* constify kexec_file_loaders[]

Changes in v3 (Sep 15, 2017)
* fix kbuild test error
* factor out arch_kexec_kernel_*() & arch_kimage_file_post_load_cleanup()
* remove CONFIG_CRASH_CORE guard from kexec_file.c
* add vmapped kernel region to vmcore for gdb backtracing
  (see prepare_elf64_headers())
* merge asm/kexec_file.h into asm/kexec.h
* and some cleanups

Changes in v2 (Sep 8, 2017)
* move core-header-related functions from crash_core.c to kexec_file.c
* drop hash-check code from purgatory
* modify purgatory asm to remove arch_kexec_apply_relocations_add()
* drop older kernel support
* drop vmlinux support (at least, for this series)

Patch #1 to #5 are all preparatory patches on generic side.
Patch #6 is purgatory code.
Patch #7 to #9 are common for enabling kexec_file_load.
Patch #10 is for 'Image' support.

AKASHI Takahiro (10):
  include: pe.h: remove message[] from mz header definition
  resource: add walk_system_ram_res_rev()
  kexec_file: factor out arch_kexec_kernel_*() from x86, powerpc
  kexec_file: factor out crashdump elf header function from x86
  asm-generic: add kexec_file_load system call to unistd.h
  arm64: kexec_file: create purgatory
  arm64: kexec_file: load initrd, device-tree and purgatory segments
  arm64: kexec_file: set up for crash dump adding elf core header
  arm64: enable KEXEC_FILE config
  arm64: kexec_file: add Image format support

 arch/arm64/Kconfig  |  29 +++
 arch/arm64/Makefile |   1 +
 arch/arm64/include/asm/kexec.h  |  93 +++
 arch/arm64/kernel/Makefile  |   4 +-
 arch/arm64/kernel/kexec_image.c | 105 
 arch/arm64/kernel/machine_kexec_file.c  | 367 
 arch/arm64/purgatory/Makefile   |  24 ++
 arch/arm64/purgatory/entry.S|  55 +
 arch/powerpc/include/asm/kexec.h|   2 +-
 arch/powerpc/kernel/kexec_elf_64.c  |   2 +-
 arch/powerpc/kernel/machine_kexec_file_64.c |  39 +--
 arch/x86/include/asm/kexec-bzimage64.h  |   2 +-
 arch/x86/kernel/crash.c | 324 
 arch/x86/kernel/kexec-bzimage64.c   |   2 +-
 arch/x86/kernel/machine_kex

Re: [PATCH 3/3] kdump: round up the total memory size to 128M for crashkernel reservation

2017-10-23 Thread Dave Young
Hi Baoquan,

On 10/24/17 at 01:57pm, Baoquan He wrote:
> Hi Dave,
> 
> On 10/24/17 at 01:31pm, Dave Young wrote:
> > The total memory size we get in kernel is usually slightly less than 2G 
> > with a
> > 2G memory module machine. The main reason is bios/firmware reserve some area
> > it will not export all memory as usable to Linux.
> > 
> > 2G memory X86 kvm guest test result of the total_mem value:
> > UEFI boot with ovmf: 0x7ef1
> > Legacy boot kvm guest: 0x7ff7cc00
> > 
> > An option is to use dmi/smbios to get physical memory size, but it's not
> > reliable as well. According to Prarit hardware vendors sometimes screw this 
> > up.
> > Thus we choose to round up total size to 128M to workaround this problem.
> > This is a best effort workaround, will improve it when we have better way
> > in the future. 
> 
> Thanks for this posting. While I don't get the point of this patch. So
> firmware take piece of memory, then why we need to count it into the
> total memory which we want to calculate a crashkernel memory based on.
> 
> Not counting that, is there anyting incorrect?

Yes, considering crashkernel=1G-2G:128M,  if we have a 1G memory
machine, we get total size 1023M from firmware then it will not fall
into 1G-2G thus no memory reserved.  User will never know that, it is
hard to let user to know the exact total value we get in kernel..

> 
> Thanks
> Baoquan
> 
> > 
> > Signed-off-by: Dave Young 
> > ---
> >  kernel/crash_core.c |   17 +
> >  1 file changed, 13 insertions(+), 4 deletions(-)
> > 
> > --- linux.orig/kernel/crash_core.c
> > +++ linux/kernel/crash_core.c
> > @@ -42,6 +42,15 @@ static int __init parse_crashkernel_mem(
> >  {
> > char *cur = cmdline, *tmp;
> > bool infinite_end = false;
> > +   unsigned long long total_mem = system_ram;
> > +
> > +   /*
> > +* Firmware usually reserves some memory regions for it's own use.
> > +* so we get less than actual system memory size.
> > +* We workaround this by round up the total size to 128M which is
> > +* enough for most test cases.
> > +*/
> > +   total_mem = roundup(total_mem, 0x800);
> >  
> > /* for each entry of the comma-separated list */
> > do {
> > @@ -86,13 +95,13 @@ static int __init parse_crashkernel_mem(
> > return -EINVAL;
> > }
> > cur = tmp;
> > -   if (size >= system_ram) {
> > +   if (size >= total_mem) {
> > pr_warn("crashkernel: invalid size\n");
> > return -EINVAL;
> > }
> >  
> > /* match ? */
> > -   if (system_ram >= start && system_ram < end) {
> > +   if (total_mem >= start && total_mem < end) {
> > *crash_size = size;
> > if (end == ULLONG_MAX)
> > infinite_end = true;
> > @@ -126,9 +135,9 @@ static int __init parse_crashkernel_mem(
> > pr_warn("Memory reservation scale order 
> > expected after '^'\n");
> > return -EINVAL;
> > }
> > -   size = (system_ram - *crash_size) >> shift;
> > +   size = (total_mem - *crash_size) >> shift;
> > size = *crash_size + roundup(size, 1ULL << 20);
> > -   if (size < system_ram)
> > +   if (size < total_mem)
> > *crash_size = size;
> > cur = tmp;
> > } else
> > 
> > 

Thanks
Dave

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/3] kdump: extend crashkernel=range:size to dynamically increase reservation size

2017-10-23 Thread Dave Young
Hi Baoquan,

On 10/24/17 at 02:00pm, Baoquan He wrote:
> On 10/24/17 at 01:31pm, Dave Young wrote:
> > crashkernel=range:size syntax allows to reserve specified size for system
> > with total memory fall into the specified range. For example:
> > crashkernel=2G-3G:128M,3G-:256M reserves 128M for system with memory >=2G
> > and memory <3G, and reserves 256M for system with memory >= 3G
> > 
> > In the above case 256M as a fixed value which can not fulfill very huge 
> > systems with large memory and IO devices. As memory size increases usually
> > minimum memory requirement for booting will also increase. It is nearly
> > impossible for a kernel to run with a fixed limited memory size for all
> > kinds of systems.
> > 
> > Thus extend the crashkernel=range:size to let user specify the scaling
> > ratio in kernel cmdline like below:
> > crashkernel=range:size^order, for example:
> > crashkernel=2G-:128M^14 reserve 128M + (total_memory - 128M) >> 14
> > for machines with over 2G memory.
> 
> Well, ah.., this look a little ugly and tricky. Leave this to other
> reviewers to comment.

But this is better than embed policy in kernel.. RHEL takes a
crashkernel=auto which do the scaling. Leaving user to determine
the scaling ratio is the better way. 

OTOH, this is an improvement to current crashkernel=range:size
which can not scale the size before.

> 
> > 
> > Also s/start-[end]/[start]-end/ in kernel-parameters.txt according to code
> > 
> > Signed-off-by: Dave Young 
> > ---
> >  Documentation/admin-guide/kernel-parameters.txt |   10 --
> >  Documentation/kdump/kdump.txt   |7 ++--
> >  kernel/crash_core.c |   35 
> > +---
> >  3 files changed, 41 insertions(+), 11 deletions(-)
> > 
> > --- linux-x86.orig/kernel/crash_core.c
> > +++ linux-x86/kernel/crash_core.c
> > @@ -31,7 +31,7 @@ static unsigned char *vmcoreinfo_data_sa
> >  /*
> >   * This function parses command lines in the format
> >   *
> > - *   crashkernel=ramsize-range:size[,...][@offset]
> > + *   crashkernel=ramsize-range:size[,...][@offset][^order]
> >   *
> >   * The function returns 0 on success and -EINVAL on failure.
> >   */
> > @@ -41,6 +41,7 @@ static int __init parse_crashkernel_mem(
> > unsigned long long *crash_base)
> >  {
> > char *cur = cmdline, *tmp;
> > +   bool infinite_end = false;
> >  
> > /* for each entry of the comma-separated list */
> > do {
> > @@ -93,13 +94,21 @@ static int __init parse_crashkernel_mem(
> > /* match ? */
> > if (system_ram >= start && system_ram < end) {
> > *crash_size = size;
> > +   if (end == ULLONG_MAX)
> > +   infinite_end = true;
> > break;
> > }
> > } while (*cur++ == ',');
> >  
> > -   if (*crash_size > 0) {
> > -   while (*cur && *cur != ' ' && *cur != '@')
> > +   if (*crash_size <= 0)
> > +   goto out;
> > +
> > +   while (*cur && *cur != ' ') {
> > +   if (*cur != '@' && *cur != '^') {
> > cur++;
> > +   continue;
> > +   }
> > +
> > if (*cur == '@') {
> > cur++;
> > *crash_base = memparse(cur, &tmp);
> > @@ -107,10 +116,28 @@ static int __init parse_crashkernel_mem(
> > pr_warn("Memory value expected after '@'\n");
> > return -EINVAL;
> > }
> > -   }
> > +   cur = tmp;
> > +   } else if (*cur == '^' && infinite_end ) {
> > +   unsigned long long shift, size;
> > +
> > +   cur++;
> > +   shift = memparse(cur, &tmp);
> > +   if (cur == tmp) {
> > +   pr_warn("Memory reservation scale order 
> > expected after '^'\n");
> > +   return -EINVAL;
> > +   }
> > +   size = (system_ram - *crash_size) >> shift;
> > +   size = *crash_size + roundup(size, 1ULL << 20);
> > +   if (size < system_ram)
> > +   *crash_size = size;
> > +   cur = tmp;
> > +   } else
> > +   cur++;
> > }
> >  
> > return 0;
> > +out:
> > +   return -EINVAL;
> >  }
> >  
> >  /*
> > --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
> > +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
> > @@ -680,12 +680,14 @@
> > is selected automatically. Check
> > Documentation/kdump/kdump.txt for further details.
> >  
> > -   crashkernel=range1:size1[,range2:size2,...][@offset]
> > +   crashkernel=range1:size1[,range2:size2,...][@offset][^order]
> > [KNL] Same as above, but depends on the memory
> > in the running system. The synta

Re: [PATCH 1/3] kdump: extend crashkernel=range:size to dynamically increase reservation size

2017-10-23 Thread Baoquan He
On 10/24/17 at 01:31pm, Dave Young wrote:
> crashkernel=range:size syntax allows to reserve specified size for system
> with total memory fall into the specified range. For example:
> crashkernel=2G-3G:128M,3G-:256M reserves 128M for system with memory >=2G
> and memory <3G, and reserves 256M for system with memory >= 3G
> 
> In the above case 256M as a fixed value which can not fulfill very huge 
> systems with large memory and IO devices. As memory size increases usually
> minimum memory requirement for booting will also increase. It is nearly
> impossible for a kernel to run with a fixed limited memory size for all
> kinds of systems.
> 
> Thus extend the crashkernel=range:size to let user specify the scaling
> ratio in kernel cmdline like below:
> crashkernel=range:size^order, for example:
> crashkernel=2G-:128M^14 reserve 128M + (total_memory - 128M) >> 14
> for machines with over 2G memory.

Well, ah.., this look a little ugly and tricky. Leave this to other
reviewers to comment.

> 
> Also s/start-[end]/[start]-end/ in kernel-parameters.txt according to code
> 
> Signed-off-by: Dave Young 
> ---
>  Documentation/admin-guide/kernel-parameters.txt |   10 --
>  Documentation/kdump/kdump.txt   |7 ++--
>  kernel/crash_core.c |   35 
> +---
>  3 files changed, 41 insertions(+), 11 deletions(-)
> 
> --- linux-x86.orig/kernel/crash_core.c
> +++ linux-x86/kernel/crash_core.c
> @@ -31,7 +31,7 @@ static unsigned char *vmcoreinfo_data_sa
>  /*
>   * This function parses command lines in the format
>   *
> - *   crashkernel=ramsize-range:size[,...][@offset]
> + *   crashkernel=ramsize-range:size[,...][@offset][^order]
>   *
>   * The function returns 0 on success and -EINVAL on failure.
>   */
> @@ -41,6 +41,7 @@ static int __init parse_crashkernel_mem(
>   unsigned long long *crash_base)
>  {
>   char *cur = cmdline, *tmp;
> + bool infinite_end = false;
>  
>   /* for each entry of the comma-separated list */
>   do {
> @@ -93,13 +94,21 @@ static int __init parse_crashkernel_mem(
>   /* match ? */
>   if (system_ram >= start && system_ram < end) {
>   *crash_size = size;
> + if (end == ULLONG_MAX)
> + infinite_end = true;
>   break;
>   }
>   } while (*cur++ == ',');
>  
> - if (*crash_size > 0) {
> - while (*cur && *cur != ' ' && *cur != '@')
> + if (*crash_size <= 0)
> + goto out;
> +
> + while (*cur && *cur != ' ') {
> + if (*cur != '@' && *cur != '^') {
>   cur++;
> + continue;
> + }
> +
>   if (*cur == '@') {
>   cur++;
>   *crash_base = memparse(cur, &tmp);
> @@ -107,10 +116,28 @@ static int __init parse_crashkernel_mem(
>   pr_warn("Memory value expected after '@'\n");
>   return -EINVAL;
>   }
> - }
> + cur = tmp;
> + } else if (*cur == '^' && infinite_end ) {
> + unsigned long long shift, size;
> +
> + cur++;
> + shift = memparse(cur, &tmp);
> + if (cur == tmp) {
> + pr_warn("Memory reservation scale order 
> expected after '^'\n");
> + return -EINVAL;
> + }
> + size = (system_ram - *crash_size) >> shift;
> + size = *crash_size + roundup(size, 1ULL << 20);
> + if (size < system_ram)
> + *crash_size = size;
> + cur = tmp;
> + } else
> + cur++;
>   }
>  
>   return 0;
> +out:
> + return -EINVAL;
>  }
>  
>  /*
> --- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
> +++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
> @@ -680,12 +680,14 @@
>   is selected automatically. Check
>   Documentation/kdump/kdump.txt for further details.
>  
> - crashkernel=range1:size1[,range2:size2,...][@offset]
> + crashkernel=range1:size1[,range2:size2,...][@offset][^order]
>   [KNL] Same as above, but depends on the memory
>   in the running system. The syntax of range is
> - start-[end] where start and end are both
> - a memory unit (amount[KMG]). See also
> - Documentation/kdump/kdump.txt for an example.
> + [start]-end where start and end are both
> + a memory unit (amount[KMG]). In case the end of the
> + range is infinity '^order' can be used to scale
> +

Re: [PATCH 3/3] kdump: round up the total memory size to 128M for crashkernel reservation

2017-10-23 Thread Baoquan He
Hi Dave,

On 10/24/17 at 01:31pm, Dave Young wrote:
> The total memory size we get in kernel is usually slightly less than 2G with a
> 2G memory module machine. The main reason is bios/firmware reserve some area
> it will not export all memory as usable to Linux.
> 
> 2G memory X86 kvm guest test result of the total_mem value:
> UEFI boot with ovmf: 0x7ef1
> Legacy boot kvm guest: 0x7ff7cc00
> 
> An option is to use dmi/smbios to get physical memory size, but it's not
> reliable as well. According to Prarit hardware vendors sometimes screw this 
> up.
> Thus we choose to round up total size to 128M to workaround this problem.
> This is a best effort workaround, will improve it when we have better way
> in the future. 

Thanks for this posting. While I don't get the point of this patch. So
firmware take piece of memory, then why we need to count it into the
total memory which we want to calculate a crashkernel memory based on.

Not counting that, is there anyting incorrect?

Thanks
Baoquan

> 
> Signed-off-by: Dave Young 
> ---
>  kernel/crash_core.c |   17 +
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> --- linux.orig/kernel/crash_core.c
> +++ linux/kernel/crash_core.c
> @@ -42,6 +42,15 @@ static int __init parse_crashkernel_mem(
>  {
>   char *cur = cmdline, *tmp;
>   bool infinite_end = false;
> + unsigned long long total_mem = system_ram;
> +
> + /*
> +  * Firmware usually reserves some memory regions for it's own use.
> +  * so we get less than actual system memory size.
> +  * We workaround this by round up the total size to 128M which is
> +  * enough for most test cases.
> +  */
> + total_mem = roundup(total_mem, 0x800);
>  
>   /* for each entry of the comma-separated list */
>   do {
> @@ -86,13 +95,13 @@ static int __init parse_crashkernel_mem(
>   return -EINVAL;
>   }
>   cur = tmp;
> - if (size >= system_ram) {
> + if (size >= total_mem) {
>   pr_warn("crashkernel: invalid size\n");
>   return -EINVAL;
>   }
>  
>   /* match ? */
> - if (system_ram >= start && system_ram < end) {
> + if (total_mem >= start && total_mem < end) {
>   *crash_size = size;
>   if (end == ULLONG_MAX)
>   infinite_end = true;
> @@ -126,9 +135,9 @@ static int __init parse_crashkernel_mem(
>   pr_warn("Memory reservation scale order 
> expected after '^'\n");
>   return -EINVAL;
>   }
> - size = (system_ram - *crash_size) >> shift;
> + size = (total_mem - *crash_size) >> shift;
>   size = *crash_size + roundup(size, 1ULL << 20);
> - if (size < system_ram)
> + if (size < total_mem)
>   *crash_size = size;
>   cur = tmp;
>   } else
> 
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 2/3] X86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-10-23 Thread dyoung
Now crashkernel=X will fail if there's not enough memory at low region
(below 896M) when trying to reserve large memory size.  One can use
crashkernel=xM,high to reserve it at high region (>4G) but it is more
convinient to improve crashkernel=X to: 

 - First try to reserve X below 896M (for being compatible with old
   kexec-tools).
 - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
 - If fails, try to reserve X from MAXMEM top down.

It's more transparent and user-friendly.

If crashkernel is large and the reserved is beyond 896M, old kexec-tools
is not compatible with new kernel because old kexec-tools can not load
kernel at high memory region, there was an old discussion below:
https://lkml.org/lkml/2013/10/15/601

But actually the behavior is consistent during my test. Suppose
old kernel fail to reserve memory at low areas, kdump does not
work because no meory reserved. With this patch, suppose new kernel
successfully reserved memory at high areas, old kexec-tools still fail
to load kdump kernel (tested 2.0.2), so it is acceptable, no need to
worry about the compatibility.

Here is the test result (kexec-tools 2.0.2, no high memory load
support):
Crashkernel over 4G:
# cat /proc/iomem|grep Crash
  be00-cdff : Crash kernel
  21300-21eff : Crash kernel
# ./kexec  -p /boot/vmlinuz-`uname -r`
Memory for crashkernel is not reserved
Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel
Then try loading kdump kernel

crashkernel: 896M-4G:
# cat /proc/iomem|grep Crash
  9600-cdef : Crash kernel
# ./kexec -p /boot/vmlinuz-4.14.0-rc4+
ELF core (kcore) parse failed
Cannot load /boot/vmlinuz-4.14.0-rc4+

Signed-off-by: Dave Young 
---
 arch/x86/kernel/setup.c |   16 
 1 file changed, 16 insertions(+)

--- linux-x86.orig/arch/x86/kernel/setup.c
+++ linux-x86/arch/x86/kernel/setup.c
@@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v
high ? CRASH_ADDR_HIGH_MAX
 : CRASH_ADDR_LOW_MAX,
crash_size, CRASH_ALIGN);
+#ifdef CONFIG_X86_64
+   /*
+* crashkernel=X reserve below 896M fails? Try below 4G
+*/
+   if (!high && !crash_base)
+   crash_base = memblock_find_in_range(CRASH_ALIGN,
+   (1ULL << 32),
+   crash_size, CRASH_ALIGN);
+   /*
+* crashkernel=X reserve below 4G fails? Try MAXMEM
+*/
+   if (!high && !crash_base)
+   crash_base = memblock_find_in_range(CRASH_ALIGN,
+   CRASH_ADDR_HIGH_MAX,
+   crash_size, CRASH_ALIGN);
+#endif
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable 
area found.\n");
return;



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 3/3] kdump: round up the total memory size to 128M for crashkernel reservation

2017-10-23 Thread dyoung
The total memory size we get in kernel is usually slightly less than 2G with a
2G memory module machine. The main reason is bios/firmware reserve some area
it will not export all memory as usable to Linux.

2G memory X86 kvm guest test result of the total_mem value:
UEFI boot with ovmf: 0x7ef1
Legacy boot kvm guest: 0x7ff7cc00

An option is to use dmi/smbios to get physical memory size, but it's not
reliable as well. According to Prarit hardware vendors sometimes screw this up.
Thus we choose to round up total size to 128M to workaround this problem.
This is a best effort workaround, will improve it when we have better way
in the future. 

Signed-off-by: Dave Young 
---
 kernel/crash_core.c |   17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

--- linux.orig/kernel/crash_core.c
+++ linux/kernel/crash_core.c
@@ -42,6 +42,15 @@ static int __init parse_crashkernel_mem(
 {
char *cur = cmdline, *tmp;
bool infinite_end = false;
+   unsigned long long total_mem = system_ram;
+
+   /*
+* Firmware usually reserves some memory regions for it's own use.
+* so we get less than actual system memory size.
+* We workaround this by round up the total size to 128M which is
+* enough for most test cases.
+*/
+   total_mem = roundup(total_mem, 0x800);
 
/* for each entry of the comma-separated list */
do {
@@ -86,13 +95,13 @@ static int __init parse_crashkernel_mem(
return -EINVAL;
}
cur = tmp;
-   if (size >= system_ram) {
+   if (size >= total_mem) {
pr_warn("crashkernel: invalid size\n");
return -EINVAL;
}
 
/* match ? */
-   if (system_ram >= start && system_ram < end) {
+   if (total_mem >= start && total_mem < end) {
*crash_size = size;
if (end == ULLONG_MAX)
infinite_end = true;
@@ -126,9 +135,9 @@ static int __init parse_crashkernel_mem(
pr_warn("Memory reservation scale order 
expected after '^'\n");
return -EINVAL;
}
-   size = (system_ram - *crash_size) >> shift;
+   size = (total_mem - *crash_size) >> shift;
size = *crash_size + roundup(size, 1ULL << 20);
-   if (size < system_ram)
+   if (size < total_mem)
*crash_size = size;
cur = tmp;
} else



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 1/3] kdump: extend crashkernel=range:size to dynamically increase reservation size

2017-10-23 Thread dyoung
crashkernel=range:size syntax allows to reserve specified size for system
with total memory fall into the specified range. For example:
crashkernel=2G-3G:128M,3G-:256M reserves 128M for system with memory >=2G
and memory <3G, and reserves 256M for system with memory >= 3G

In the above case 256M as a fixed value which can not fulfill very huge 
systems with large memory and IO devices. As memory size increases usually
minimum memory requirement for booting will also increase. It is nearly
impossible for a kernel to run with a fixed limited memory size for all
kinds of systems.

Thus extend the crashkernel=range:size to let user specify the scaling
ratio in kernel cmdline like below:
crashkernel=range:size^order, for example:
crashkernel=2G-:128M^14 reserve 128M + (total_memory - 128M) >> 14
for machines with over 2G memory.

Also s/start-[end]/[start]-end/ in kernel-parameters.txt according to code

Signed-off-by: Dave Young 
---
 Documentation/admin-guide/kernel-parameters.txt |   10 --
 Documentation/kdump/kdump.txt   |7 ++--
 kernel/crash_core.c |   35 +---
 3 files changed, 41 insertions(+), 11 deletions(-)

--- linux-x86.orig/kernel/crash_core.c
+++ linux-x86/kernel/crash_core.c
@@ -31,7 +31,7 @@ static unsigned char *vmcoreinfo_data_sa
 /*
  * This function parses command lines in the format
  *
- *   crashkernel=ramsize-range:size[,...][@offset]
+ *   crashkernel=ramsize-range:size[,...][@offset][^order]
  *
  * The function returns 0 on success and -EINVAL on failure.
  */
@@ -41,6 +41,7 @@ static int __init parse_crashkernel_mem(
unsigned long long *crash_base)
 {
char *cur = cmdline, *tmp;
+   bool infinite_end = false;
 
/* for each entry of the comma-separated list */
do {
@@ -93,13 +94,21 @@ static int __init parse_crashkernel_mem(
/* match ? */
if (system_ram >= start && system_ram < end) {
*crash_size = size;
+   if (end == ULLONG_MAX)
+   infinite_end = true;
break;
}
} while (*cur++ == ',');
 
-   if (*crash_size > 0) {
-   while (*cur && *cur != ' ' && *cur != '@')
+   if (*crash_size <= 0)
+   goto out;
+
+   while (*cur && *cur != ' ') {
+   if (*cur != '@' && *cur != '^') {
cur++;
+   continue;
+   }
+
if (*cur == '@') {
cur++;
*crash_base = memparse(cur, &tmp);
@@ -107,10 +116,28 @@ static int __init parse_crashkernel_mem(
pr_warn("Memory value expected after '@'\n");
return -EINVAL;
}
-   }
+   cur = tmp;
+   } else if (*cur == '^' && infinite_end ) {
+   unsigned long long shift, size;
+
+   cur++;
+   shift = memparse(cur, &tmp);
+   if (cur == tmp) {
+   pr_warn("Memory reservation scale order 
expected after '^'\n");
+   return -EINVAL;
+   }
+   size = (system_ram - *crash_size) >> shift;
+   size = *crash_size + roundup(size, 1ULL << 20);
+   if (size < system_ram)
+   *crash_size = size;
+   cur = tmp;
+   } else
+   cur++;
}
 
return 0;
+out:
+   return -EINVAL;
 }
 
 /*
--- linux-x86.orig/Documentation/admin-guide/kernel-parameters.txt
+++ linux-x86/Documentation/admin-guide/kernel-parameters.txt
@@ -680,12 +680,14 @@
is selected automatically. Check
Documentation/kdump/kdump.txt for further details.
 
-   crashkernel=range1:size1[,range2:size2,...][@offset]
+   crashkernel=range1:size1[,range2:size2,...][@offset][^order]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
-   start-[end] where start and end are both
-   a memory unit (amount[KMG]). See also
-   Documentation/kdump/kdump.txt for an example.
+   [start]-end where start and end are both
+   a memory unit (amount[KMG]). In case the end of the
+   range is infinity '^order' can be used to scale
+   the size to size + (total_mem - start) >> 2^order
+   See also Documentation/kdump/kdump.txt for an example.
 
crashkernel=size[KMG],high
[KNL, x86_64] range could be above 4G. Allow kernel
--- li

[PATCH 0/3] kdump: crashkernel parameter improvement

2017-10-23 Thread dyoung
Hi,

Here is a try to improve current crashkernel kernel parameter

Patch 1/3 adds an extra functionality so that one can use like
crashkernel=2G-:128M^12 to reserve 128M for 2G+ machine but also scale the
size based on system memory, that means 128M + (total_mem - 128M) >> 12

Patch 2/3 is a resending of previous post to let crashkernel=xM to reserve
first from below 896M, then <4G, then http://lists.infradead.org/mailman/listinfo/kexec


Re: x86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-10-23 Thread Dave Young
On 10/20/17 at 01:52pm, Dave Young wrote:
> Now crashkernel=X will fail if there's not enough memory at low region
> (below 896M) when trying to reserve large memory size.  One can use
> crashkernel=xM,high to reserve it at high region (>4G) but it is more
> convinient to improve crashkernel=X to: 
> 
>  - First try to reserve X below 896M (for being compatible with old
>kexec-tools).
>  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
>  - If fails, try to reserve X from MAXMEM top down.
> 
> It's more transparent and user-friendly.
> 
> If crashkernel is large and the reserved is beyond 896M, old kexec-tools
> is not compatible with new kernel because old kexec-tools can not load
> kernel at high memory region, there was an old discussion below
> (previously posted by Chao Wang):
> https://lkml.org/lkml/2013/10/15/601
> 
> But actually the behavior is consistent during my test. Suppose
> old kernel fail to reserve memory at low areas, kdump does not
> work because no memory reserved. With this patch, suppose new kernel
> successfully reserved memory at high areas, old kexec-tools still fail
> to load kdump kernel (tested 2.0.2), so it is acceptable, no need to
> worry about the compatibility.
> 
> Here is the test result (kexec-tools 2.0.2, no high memory load
> support):
> Crashkernel over 4G:
> # cat /proc/iomem|grep Crash
>   be00-cdff : Crash kernel
>   21300-21eff : Crash kernel
> # ./kexec  -p /boot/vmlinuz-`uname -r`
> Memory for crashkernel is not reserved
> Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel
> Then try loading kdump kernel
> 
> crashkernel: 896M-4G:
> # cat /proc/iomem|grep Crash
>   9600-cdef : Crash kernel
> # ./kexec -p /boot/vmlinuz-4.14.0-rc4+
> ELF core (kcore) parse failed
> Cannot load /boot/vmlinuz-4.14.0-rc4+
> 
> Signed-off-by: Dave Young 
> ---
>  arch/x86/kernel/setup.c |   16 
>  1 file changed, 16 insertions(+)
> 
> --- linux-x86.orig/arch/x86/kernel/setup.c
> +++ linux-x86/arch/x86/kernel/setup.c
> @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v
>   high ? CRASH_ADDR_HIGH_MAX
>: CRASH_ADDR_LOW_MAX,
>   crash_size, CRASH_ALIGN);
> +#ifdef CONFIG_X86_64
> + /*
> +  * crashkernel=X reserve below 896M fails? Try below 4G
> +  */
> + if (!high && !crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + (1ULL << 32),
> + crash_size, CRASH_ALIGN);
> + /*
> +  * crashkernel=X reserve below 4G fails? Try MAXMEM
> +  */
> + if (!high && !crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_HIGH_MAX,
> + crash_size, CRASH_ALIGN);
> +#endif
>   if (!crash_base) {
>   pr_info("crashkernel reservation failed - No suitable 
> area found.\n");
>   return;


Hmm, forgot to add [PATCH] prefix, will resend this along with other two
crashkernel patches.

Thanks
Dave

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 2/2] ARM: read kernel size from zImage

2017-10-23 Thread Russell King
Read the new extension data which tells the boot agent about the
requirements for booting the kernel image, such as how much RAM
will be consumed by the kernel through decompression and booting.
This is necessary to control the placement of the DTB and
compressed RAM disk to avoid these objects being corrupted.

Tested-by: Tony Lindgren 
Signed-off-by: Russell King 
---
 kexec/arch/arm/kexec-zImage-arm.c | 51 ++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/kexec/arch/arm/kexec-zImage-arm.c 
b/kexec/arch/arm/kexec-zImage-arm.c
index c6ecb04..a8c40cb 100644
--- a/kexec/arch/arm/kexec-zImage-arm.c
+++ b/kexec/arch/arm/kexec-zImage-arm.c
@@ -34,6 +34,15 @@ struct zimage_header {
 #define ZIMAGE_MAGIC cpu_to_le32(0x016f2818)
uint32_t start;
uint32_t end;
+   uint32_t endian;
+
+   /* Extension to the data passed to the boot agent.  The offset
+* points at a tagged table following a similar format to the
+* ATAGs.
+*/
+   uint32_t magic2;
+#define ZIMAGE_MAGIC2 (0x45454545)
+   uint32_t extension_tag_offset;
 };
 
 struct android_image {
@@ -114,6 +123,17 @@ struct tag {
 #define byte_size(t)((t)->hdr.size << 2)
 #define tag_size(type)  ((sizeof(struct tag_header) + sizeof(struct type) + 3) 
>> 2)
 
+struct zimage_tag {
+   struct tag_header hdr;
+   union {
+#define ZIMAGE_TAG_KRNL_SIZE cpu_to_le32(0x5a534c4b)
+   struct zimage_krnl_size {
+   uint32_t size_ptr;
+   uint32_t bss_size;
+   } krnl_size;
+   } u;
+};
+
 int zImage_arm_probe(const char *UNUSED(buf), off_t UNUSED(len))
 {
/* 
@@ -434,7 +454,7 @@ int zImage_arm_load(int argc, char **argv, const char *buf, 
off_t len,
if (dtb_file)
dtb_buf = slurp_file(dtb_file, &dtb_length);
 
-   if (len > 0x34) {
+   if (len > sizeof(struct zimage_header)) {
const struct zimage_header *hdr;
off_t size;
 
@@ -460,6 +480,35 @@ int zImage_arm_load(int argc, char **argv, const char 
*buf, off_t len,
if (size < len)
len = size;
}
+
+   /* Do we have an extension table? */
+   if (hdr->magic2 == ZIMAGE_MAGIC2 && !kexec_arm_image_size) {
+   uint32_t offset = hdr->extension_tag_offset;
+   uint32_t max = len - sizeof(struct tag_header);
+   struct zimage_tag *tag;
+
+   dbgprintf("zImage has tags\n");
+
+   for (offset = hdr->extension_tag_offset;
+(tag = (void *)(buf + offset)) != NULL &&
+offset < max && byte_size(tag) &&
+   offset + byte_size(tag) < len;
+offset += byte_size(tag)) {
+   dbgprintf("  offset 0x%08x tag 0x%08x size 
%u\n",
+ offset, tag->hdr.tag, byte_size(tag));
+   if (tag->hdr.tag == ZIMAGE_TAG_KRNL_SIZE) {
+   uint32_t *p = (void *)buf +
+   tag->u.krnl_size.size_ptr;
+
+   kexec_arm_image_size =
+   get_unaligned(p) +
+   tag->u.krnl_size.bss_size;
+   }
+   }
+
+   dbgprintf("kernel image size: 0x%08x\n",
+ kexec_arm_image_size);
+   }
}
 
/* Handle android images, 2048 is the minimum page size */
-- 
2.7.4


-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 1/2] ARM: cleanup initrd and dtb handing

2017-10-23 Thread Russell King
There is no difference in the way the initrd is handled between an
ATAG-based kernel and a DTB-based kernel.  Therefore, this should be
handled identically in both cases.

Rearrange the code to achieve this.

Signed-off-by: Russell King 
---
 kexec/arch/arm/kexec-zImage-arm.c | 116 +++---
 1 file changed, 57 insertions(+), 59 deletions(-)

diff --git a/kexec/arch/arm/kexec-zImage-arm.c 
b/kexec/arch/arm/kexec-zImage-arm.c
index 7f02b93..c6ecb04 100644
--- a/kexec/arch/arm/kexec-zImage-arm.c
+++ b/kexec/arch/arm/kexec-zImage-arm.c
@@ -24,7 +24,7 @@
 
 #define BOOT_PARAMS_SIZE 1536
 
-off_t initrd_base = 0, initrd_size = 0;
+off_t initrd_base, initrd_size;
 unsigned int kexec_arm_image_size = 0;
 unsigned long long user_page_offset = (-1ULL);
 
@@ -200,14 +200,12 @@ int create_mem32_tag(struct tag_mem32 *tag_mem32)
 
 static
 int atag_arm_load(struct kexec_info *info, unsigned long base,
-   const char *command_line, off_t command_line_len,
-   const char *initrd, off_t initrd_len, off_t initrd_off)
+   const char *command_line, off_t command_line_len, const char *initrd)
 {
struct tag *saved_tags = atag_read_tags();
char *buf;
off_t len;
struct tag *params;
-   uint32_t *initrd_start = NULL;

buf = xmalloc(getpagesize());
if (!buf) {
@@ -251,8 +249,8 @@ int atag_arm_load(struct kexec_info *info, unsigned long 
base,
if (initrd) {
params->hdr.size = tag_size(tag_initrd);
params->hdr.tag = ATAG_INITRD2;
-   initrd_start = ¶ms->u.initrd.start;
-   params->u.initrd.size = initrd_len;
+   params->u.initrd.start = initrd_base;
+   params->u.initrd.size = initrd_size;
params = tag_next(params);
}
 
@@ -272,14 +270,6 @@ int atag_arm_load(struct kexec_info *info, unsigned long 
base,
 
add_segment(info, buf, len, base, len);
 
-   if (initrd) {
-   *initrd_start = locate_hole(info, initrd_len, getpagesize(),
-   initrd_off, ULONG_MAX, INT_MAX);
-   if (*initrd_start == ULONG_MAX)
-   return -1;
-   add_segment(info, initrd, initrd_len, *initrd_start, 
initrd_len);
-   }
-
return 0;
 }
 
@@ -348,6 +338,7 @@ static int setup_dtb_prop(char **bufp, off_t *sizep, int 
parentoffset,
 int zImage_arm_load(int argc, char **argv, const char *buf, off_t len,
struct kexec_info *info)
 {
+   unsigned long page_size = getpagesize();
unsigned long base, kernel_base;
unsigned int atag_offset = 0x1000; /* 4k offset from memory start */
unsigned int extra_size = 0x8000; /* TEXT_OFFSET */
@@ -513,6 +504,14 @@ int zImage_arm_load(int argc, char **argv, const char 
*buf, off_t len,
kernel_mem_size = len + 4;
 
/*
+* If the user didn't specify the size of the image, assume the
+* maximum kernel compression ratio is 4.  Note that we must
+* include space for the compressed image here as well.
+*/
+   if (!kexec_arm_image_size)
+   kexec_arm_image_size = len * 5;
+
+   /*
 * If we are loading a dump capture kernel, we need to update kernel
 * command line and also add some additional segments.
 */
@@ -562,17 +561,28 @@ int zImage_arm_load(int argc, char **argv, const char 
*buf, off_t len,
 
kernel_base = base + extra_size;
 
-   if (kexec_arm_image_size) {
-   /* If the image size was passed as command line argument,
-* use that value for determining the address for initrd,
-* atags and dtb images. page-align the given length.*/
-   initrd_base = kernel_base + _ALIGN(kexec_arm_image_size, 
getpagesize());
-   } else {
-   /* Otherwise, assume the maximum kernel compression ratio
-* is 4, and just to be safe, place ramdisk after that.
-* Note that we must include space for the compressed
-* image here as well. */
-   initrd_base = kernel_base + _ALIGN(len * 5, getpagesize());
+   /*
+* Calculate the minimum address of the initrd, which must be
+* above the memory used by the zImage while it runs.  This
+* needs to be page-size aligned.
+*/
+   initrd_base = kernel_base + _ALIGN(kexec_arm_image_size, page_size);
+
+   if (ramdisk_buf) {
+   /*
+* Find a hole to place the initrd. The crash kernel use
+* fixed address, so no check is ok.
+*/
+   if (!(info->kexec_flags & KEXEC_ON_CRASH)) {
+   initrd_base = locate_hole(info, initrd_size, page_size,
+ initrd_base,
+ ULONG_MAX, INT_MAX);
+   if (initrd_base == UL