Re: [PATCH v11 3/7] crash: add generic infrastructure for crash hotplug support
On 8/30/22 22:26, Baoquan He wrote: On 08/26/22 at 01:37pm, Eric DeVolder wrote: CPU and memory change notifications are received in order to regenerate the elfcorehdr. To support cpu hotplug, a callback is registered to capture the CPUHP_AP_ONLINE_DYN online and offline events via cpuhp_setup_state_nocalls(). To support memory hotplug, a notifier is registered to capture the MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier(). The cpu callback and memory notifiers call handle_hotplug_event() which performs needed tasks and then dispatches the event to the architecture specific arch_crash_handle_hotplug_event(). During the process, the kexec_mutex is held. Signed-off-by: Eric DeVolder --- include/linux/crash_core.h | 8 +++ include/linux/kexec.h | 26 +++ kernel/crash_core.c| 134 + 3 files changed, 168 insertions(+) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index de62a722431e..3b99e69b011f 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +#define KEXEC_CRASH_HP_REMOVE_CPU 0 +#define KEXEC_CRASH_HP_ADD_CPU 1 +#define KEXEC_CRASH_HP_REMOVE_MEMORY 2 ~~ Nitpick, These arenot aligned, +#define KEXEC_CRASH_HP_ADD_MEMORY 3 +#define KEXEC_CRASH_HP_INVALID_CPU -1U + +struct kimage; + #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4eefa631e0ae..9597b41136ec 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -374,6 +374,13 @@ struct kimage { struct purgatory_info purgatory_info; #endif +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) + bool hotplug_event; + unsigned int offlinecpu; + bool elfcorehdr_index_valid; + int elfcorehdr_index; +#endif + #ifdef CONFIG_IMA_KEXEC /* Virtual address of IMA measurement buffer for kexec syscall */ void *ima_buffer; @@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } #endif +#ifndef arch_map_crash_pages +static inline void *arch_map_crash_pages(unsigned long paddr, + unsigned long size) +{ + return NULL; +} +#endif + +#ifndef arch_unmap_crash_pages +static inline void arch_unmap_crash_pages(void **ptr) { } +#endif + +#ifndef arch_crash_handle_hotplug_event +static inline void arch_crash_handle_hotplug_event(struct kimage *image, + unsigned int hp_action) +{ +} +#endif + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 0f8aa659cca4..455150205ded 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -18,6 +20,7 @@ #include #include "kallsyms_internal.h" +#include "kexec_internal.h" /* vmcoreinfo stuff */ unsigned char *vmcoreinfo_data; @@ -611,3 +614,134 @@ static int __init crash_save_vmcoreinfo_init(void) } subsys_initcall(crash_save_vmcoreinfo_init); + +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) +/* + * To accurately reflect hot un/plug changes, the elfcorehdr (which + * is passed to the crash kernel via the elfcorehdr= parameter) + * must be updated with the new list of CPUs and memories. + * + * In order to make changes to elfcorehdr, two conditions are needed: + * First, the segment containing the elfcorehdr must be large enough + * to permit a growing number of resources. The elfcorehdr memory is + * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES. + * Second, purgatory must explicitly exclude the elfcorehdr from the + * list of segments it checks (since the elfcorehdr changes and thus + * would require an update to purgatory itself to update the digest). + */ +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu) +{ + /* Obtain lock while changing crash information */ + mutex_lock(&kexec_mutex); + + /* Check kdump is loaded */ + if (kexec_crash_image) { + struct kimage *image = kexec_crash_image; + + if (hp_action == KEXEC_CRASH_HP_ADD_CPU || + hp_action == KEXEC_CRASH_HP_REMOVE_CPU) + pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu); + else + pr_debug("crash hp: hp_action %u", hp_action);
[PATCH v12 5/7] kexec: exclude hot remove cpu from elfcorehdr notes
Due to use of CPUHP_AP_ONLINE_DYN, upon CPU unplug, the CPU is still in the for_each_present_cpu() list when within the handle_hotplug_event(). Thus the CPU must be explicitly excluded when building the new list of CPUs. This change identifies in handle_hotplug_event() the CPU to be excluded, and the check for excluding the CPU in crash_prepare_elf64_headers(). Signed-off-by: Eric DeVolder Acked-by: Baoquan He --- kernel/crash_core.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 4b15d91f0b21..5bc5159d9cb1 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -366,6 +366,11 @@ int crash_prepare_elf64_headers(struct kimage *image, struct crash_mem *mem, /* Prepare one phdr of type PT_NOTE for each present CPU */ for_each_present_cpu(cpu) { +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) + /* Skip the soon-to-be offlined cpu */ + if (image->hotplug_event && (cpu == image->offlinecpu)) + continue; +#endif phdr->p_type = PT_NOTE; notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); phdr->p_offset = phdr->p_paddr = notes_addr; @@ -682,6 +687,16 @@ static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu) /* Flag to differentiate between normal load and hotplug */ image->hotplug_event = true; + /* +* Due to use of CPUHP_AP_ONLINE_DYN, upon unplug and during +* this callback, the CPU is still in the for_each_present_cpu() +* list. Must explicitly look to exclude this CPU when building +* new list. +*/ + image->offlinecpu = + (hp_action == KEXEC_CRASH_HP_REMOVE_CPU) ? + cpu : KEXEC_CRASH_HP_INVALID_CPU; + /* Now invoke arch-specific update handler */ arch_crash_handle_hotplug_event(image, hp_action); -- 2.31.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v12 1/7] crash: move crash_prepare_elf64_headers
At the outcome of this patch set, the crash_prepare_elf64_headers() is utilized on both the kexec_file_load and kexec_load paths. As such, need to move this function out of kexec_file.c and into a common location crash_core.c. No functionality change. Signed-off-by: Eric DeVolder Acked-by: Baoquan He --- kernel/crash_core.c | 100 kernel/kexec_file.c | 99 --- 2 files changed, 100 insertions(+), 99 deletions(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index a0eb4d5cf557..46c160d14045 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -314,6 +315,105 @@ static int __init parse_crashkernel_dummy(char *arg) } early_param("crashkernel", parse_crashkernel_dummy); +int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, + void **addr, unsigned long *sz) +{ + Elf64_Ehdr *ehdr; + Elf64_Phdr *phdr; + unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; + unsigned char *buf; + unsigned int cpu, i; + unsigned long long notes_addr; + unsigned long mstart, mend; + + /* extra phdr for vmcoreinfo ELF note */ + nr_phdr = nr_cpus + 1; + nr_phdr += mem->nr_ranges; + + /* +* kexec-tools creates an extra PT_LOAD phdr for kernel text mapping +* area (for example, 8000 - a000 on x86_64). +* I think this is required by tools like gdb. So same physical +* memory will be mapped in two ELF headers. One will contain kernel +* text virtual addresses and other will have __va(physical) addresses. +*/ + + nr_phdr++; + elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); + elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); + + buf = vzalloc(elf_sz); + if (!buf) + return -ENOMEM; + + ehdr = (Elf64_Ehdr *)buf; + phdr = (Elf64_Phdr *)(ehdr + 1); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + ehdr->e_ident[EI_OSABI] = ELF_OSABI; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = ELF_ARCH; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + + /* Prepare one phdr of type PT_NOTE for each present CPU */ + for_each_present_cpu(cpu) { + phdr->p_type = PT_NOTE; + notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); + phdr->p_offset = phdr->p_paddr = notes_addr; + phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); + (ehdr->e_phnum)++; + phdr++; + } + + /* Prepare one PT_NOTE header for vmcoreinfo */ + phdr->p_type = PT_NOTE; + phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); + phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; + (ehdr->e_phnum)++; + phdr++; + + /* Prepare PT_LOAD type program header for kernel text region */ + if (need_kernel_map) { + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long) _text; + phdr->p_filesz = phdr->p_memsz = _end - _text; + phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); + ehdr->e_phnum++; + phdr++; + } + + /* Go through all the ranges in mem->ranges[] and prepare phdr */ + for (i = 0; i < mem->nr_ranges; i++) { + mstart = mem->ranges[i].start; + mend = mem->ranges[i].end; + + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = mstart; + + phdr->p_paddr = mstart; + phdr->p_vaddr = (unsigned long) __va(mstart); + phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; + phdr->p_align = 0; + ehdr->e_phnum++; + pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", + phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, + ehdr->e_phnum, phdr->p_offset); + phdr++; + } + + *addr = buf; + *sz = elf_sz; + return 0; +} + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) { diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 1d546dc97c50..8017eeb43036 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @
[PATCH v12 2/7] crash: prototype change for crash_prepare_elf64_headers
>From within crash_prepare_elf64_headers() there is a need to reference the struct kimage hotplug members. As such, this change passes the struct kimage as a parameter to the crash_prepare_elf64_headers(). The hotplug members are added in "crash: add generic infrastructure for crash hotplug support". This is preparation for later patch, no functionality change. Signed-off-by: Eric DeVolder Acked-by: Baoquan He Acked-by: David Hildenbrand --- arch/arm64/kernel/machine_kexec_file.c | 6 +++--- arch/powerpc/kexec/file_load_64.c | 2 +- arch/riscv/kernel/elf_kexec.c | 7 --- arch/x86/kernel/crash.c| 2 +- include/linux/kexec.h | 7 +-- kernel/crash_core.c| 4 ++-- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index a11a6e14ba89..2f7b773a83bb 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -39,7 +39,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return kexec_image_post_load_cleanup_default(image); } -static int prepare_elf_headers(void **addr, unsigned long *sz) +static int prepare_elf_headers(struct kimage *image, void **addr, unsigned long *sz) { struct crash_mem *cmem; unsigned int nr_ranges; @@ -64,7 +64,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) } /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + ret = crash_exclude_mem_range(image, cmem, crashk_res.start, crashk_res.end); if (ret) goto out; @@ -74,7 +74,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) goto out; } - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + ret = crash_prepare_elf64_headers(image, cmem, true, addr, sz); out: kfree(cmem); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 349a781cea0b..a0af9966a8f0 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -798,7 +798,7 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) goto out; /* Setup elfcorehdr segment */ - ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz); + ret = crash_prepare_elf64_headers(image, cmem, false, &headers, &headers_sz); if (ret) { pr_err("Failed to prepare elf headers for the core\n"); goto out; diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 0cb94992c15b..ffde73228108 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -118,7 +118,8 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -static int prepare_elf_headers(void **addr, unsigned long *sz) +static int prepare_elf_headers(struct kimage *image, + void **addr, unsigned long *sz) { struct crash_mem *cmem; unsigned int nr_ranges; @@ -140,7 +141,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + ret = crash_prepare_elf64_headers(image, cmem, true, addr, sz); out: kfree(cmem); @@ -212,7 +213,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, /* Add elfcorehdr */ if (image->type == KEXEC_TYPE_CRASH) { - ret = prepare_elf_headers(&headers, &headers_sz); + ret = prepare_elf_headers(image, &headers, &headers_sz); if (ret) { pr_err("Preparing elf core header failed\n"); goto out; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9730c88530fc..9ceb93c176a6 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -265,7 +265,7 @@ static int prepare_elf_headers(struct kimage *image, void **addr, goto out; /* By default prepare 64bit headers */ - ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); + ret = crash_prepare_elf64_headers(image, cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); out: vfree(cmem); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 13e6c4b58f07..4eefa631e0ae 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -253,8 +253,11 @@ struct crash_mem { extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mstart, unsigned long long mend); -extern int crash_prepare_
[PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support
CPU and memory change notifications are received in order to regenerate the elfcorehdr. To support cpu hotplug, a callback is registered to capture the CPUHP_AP_ONLINE_DYN online and offline events via cpuhp_setup_state_nocalls(). To support memory hotplug, a notifier is registered to capture the MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier(). The cpu callback and memory notifiers call handle_hotplug_event() which performs needed tasks and then dispatches the event to the architecture specific arch_crash_handle_hotplug_event(). During the process, the kexec_mutex is held. Signed-off-by: Eric DeVolder Acked-by: Baoquan He --- include/linux/crash_core.h | 8 +++ include/linux/kexec.h | 26 +++ kernel/crash_core.c| 134 + 3 files changed, 168 insertions(+) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index de62a722431e..a270f8660538 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +#define KEXEC_CRASH_HP_REMOVE_CPU 0 +#define KEXEC_CRASH_HP_ADD_CPU 1 +#define KEXEC_CRASH_HP_REMOVE_MEMORY 2 +#define KEXEC_CRASH_HP_ADD_MEMORY 3 +#define KEXEC_CRASH_HP_INVALID_CPU -1U + +struct kimage; + #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4eefa631e0ae..9597b41136ec 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -374,6 +374,13 @@ struct kimage { struct purgatory_info purgatory_info; #endif +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) + bool hotplug_event; + unsigned int offlinecpu; + bool elfcorehdr_index_valid; + int elfcorehdr_index; +#endif + #ifdef CONFIG_IMA_KEXEC /* Virtual address of IMA measurement buffer for kexec syscall */ void *ima_buffer; @@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } #endif +#ifndef arch_map_crash_pages +static inline void *arch_map_crash_pages(unsigned long paddr, + unsigned long size) +{ + return NULL; +} +#endif + +#ifndef arch_unmap_crash_pages +static inline void arch_unmap_crash_pages(void **ptr) { } +#endif + +#ifndef arch_crash_handle_hotplug_event +static inline void arch_crash_handle_hotplug_event(struct kimage *image, + unsigned int hp_action) +{ +} +#endif + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 8c648fd5897a..4b15d91f0b21 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -18,6 +20,7 @@ #include #include "kallsyms_internal.h" +#include "kexec_internal.h" /* vmcoreinfo stuff */ unsigned char *vmcoreinfo_data; @@ -612,3 +615,134 @@ static int __init crash_save_vmcoreinfo_init(void) } subsys_initcall(crash_save_vmcoreinfo_init); + +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) +/* + * To accurately reflect hot un/plug changes, the elfcorehdr (which + * is passed to the crash kernel via the elfcorehdr= parameter) + * must be updated with the new list of CPUs and memories. + * + * In order to make changes to elfcorehdr, two conditions are needed: + * First, the segment containing the elfcorehdr must be large enough + * to permit a growing number of resources. The elfcorehdr memory is + * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES. + * Second, purgatory must explicitly exclude the elfcorehdr from the + * list of segments it checks (since the elfcorehdr changes and thus + * would require an update to purgatory itself to update the digest). + */ +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu) +{ + /* Obtain lock while changing crash information */ + mutex_lock(&kexec_mutex); + + /* Check kdump is loaded */ + if (kexec_crash_image) { + struct kimage *image = kexec_crash_image; + + if (hp_action == KEXEC_CRASH_HP_ADD_CPU || + hp_action == KEXEC_CRASH_HP_REMOVE_CPU) + pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu); + else + pr_debug("crash hp: hp_action %u\n", hp_action); + + /* +* When the struct kimage is allocated, it is wiped to zero, so +* the elfcorehdr_index_valid defaults to false. Find the +* segment containing the elfcorehdr, if not already
[PATCH v12 6/7] crash: memory and cpu hotplug sysfs attributes
This introduces the crash_hotplug attribute for memory and CPUs for use by userspace. This change directly facilitates the udev rule for managing userspace re-loading of the crash kernel upon hot un/plug changes. For memory, this changeset introduces the crash_hotplug attribute to the /sys/devices/system/memory directory. For example: # udevadm info --attribute-walk /sys/devices/system/memory/memory81 looking at device '/devices/system/memory/memory81': KERNEL=="memory81" SUBSYSTEM=="memory" DRIVER=="" ATTR{online}=="1" ATTR{phys_device}=="0" ATTR{phys_index}=="0051" ATTR{removable}=="1" ATTR{state}=="online" ATTR{valid_zones}=="Movable" looking at parent device '/devices/system/memory': KERNELS=="memory" SUBSYSTEMS=="" DRIVERS=="" ATTRS{auto_online_blocks}=="offline" ATTRS{block_size_bytes}=="800" ATTRS{crash_hotplug}=="1" For CPUs, this changeset introduces the crash_hotplug attribute to the /sys/devices/system/cpu directory. For example: # udevadm info --attribute-walk /sys/devices/system/cpu/cpu0 looking at device '/devices/system/cpu/cpu0': KERNEL=="cpu0" SUBSYSTEM=="cpu" DRIVER=="processor" ATTR{crash_notes}=="277c38600" ATTR{crash_notes_size}=="368" ATTR{online}=="1" looking at parent device '/devices/system/cpu': KERNELS=="cpu" SUBSYSTEMS=="" DRIVERS=="" ATTRS{crash_hotplug}=="1" ATTRS{isolated}=="" ATTRS{kernel_max}=="8191" ATTRS{nohz_full}==" (null)" ATTRS{offline}=="4-7" ATTRS{online}=="0-3" ATTRS{possible}=="0-7" ATTRS{present}=="0-3" With these sysfs attributes in place, it is possible to efficiently instruct the udev rule to skip crash kernel reloading. For example, the following is the proposed udev rule change for RHEL system 98-kexec.rules (as the first lines of the rule file): # The kernel handles updates to crash elfcorehdr for cpu and memory changes SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" When examined in the context of 98-kexec.rules, the above change tests if crash_hotplug is set, and if so, it skips the userspace initiated unload-then-reload of the crash kernel. Cpu and memory checks are separated in accordance with CONFIG_HOTPLUG_CPU and CONFIG_MEMORY_HOTPLUG kernel config options. If an architecture supports, for example, memory hotplug but not CPU hotplug, then the /sys/devices/system/memory/crash_hotplug attribute file is present, but the /sys/devices/system/cpu/crash_hotplug attribute file will NOT be present. Thus the udev rule will skip userspace processing of memory hot un/plug events, but the udev rule will fail for CPU events, thus allowing userspace to process cpu hot un/plug events (ie the unload-then-reload of the kdump capture kernel). Signed-off-by: Eric DeVolder Acked-by: Baoquan He --- .../admin-guide/mm/memory-hotplug.rst | 8 Documentation/core-api/cpu_hotplug.rst | 18 ++ drivers/base/cpu.c | 14 ++ drivers/base/memory.c | 13 + include/linux/kexec.h | 8 5 files changed, 61 insertions(+) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index a3c9e8ad8fa0..15fd1751a63c 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -293,6 +293,14 @@ The following files are currently defined: Availability depends on the CONFIG_ARCH_MEMORY_PROBE kernel configuration option. ``uevent``read-write: generic udev file for device subsystems. +``crash_hotplug`` read-only: when changes to the system memory map + occur due to hot un/plug of memory, this file contains + '1' if the kernel updates the kdump capture kernel memory + map itself (via elfcorehdr), or '0' if userspace must update + the kdump capture kernel memory map. + + Availability depends on the CONFIG_MEMORY_HOTPLUG kernel + configuration option. == = .. note:: diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index c6f4ba2fb32d..13e33d098645 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -750,6 +750,24 @@ will receive all events. A script like:: can process the event further. +When changes to the CPUs in the system occur, the sysfs file +/sys/devices/system/cpu/crash_hotplug contains '1' if the kernel +updates the kdump capture kernel list of CPUs itself (via elfcorehdr), +or '0' if userspace must update the kdump
[PATCH v12 7/7] x86/crash: Add x86 crash hotplug support
For x86_64, when CPU or memory is hot un/plugged, the crash elfcorehdr, which describes the CPUs and memory in the system, must also be updated. When loading the crash kernel via kexec_load or kexec_file_load, the elfcorehdr is identified at run time in crash_core:handle_hotplug_event(). To update the elfcorehdr for x86_64, a new elfcorehdr must be generated from the available CPUs and memory. The new elfcorehdr is prepared into a buffer, and then installed over the top of the existing elfcorehdr. In the patch 'kexec: exclude elfcorehdr from the segment digest' the need to update purgatory due to the change in elfcorehdr was eliminated. As a result, no changes to purgatory or boot_params (as the elfcorehdr= kernel command line parameter pointer remains unchanged and correct) are needed, just elfcorehdr. To accommodate a growing number of resources via hotplug, the elfcorehdr segment must be sufficiently large enough to accommodate changes, see the CRASH_MAX_MEMORY_RANGES configure item. With this change, crash hotplug for kexec_file_load syscall is supported. The kexec_load is also supported, but also requires a corresponding change to userspace kexec-tools. Signed-off-by: Eric DeVolder Acked-by: Baoquan He --- arch/x86/Kconfig | 11 arch/x86/include/asm/kexec.h | 20 +++ arch/x86/kernel/crash.c | 102 +++ 3 files changed, 133 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f9920f1341c8..cdfc9b2fdf98 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2056,6 +2056,17 @@ config CRASH_DUMP (CONFIG_RELOCATABLE=y). For more details see Documentation/admin-guide/kdump/kdump.rst +config CRASH_MAX_MEMORY_RANGES + depends on CRASH_DUMP && KEXEC_FILE && (HOTPLUG_CPU || MEMORY_HOTPLUG) + int + default 32768 + help + For the kexec_file_load path, specify the maximum number of + memory regions, eg. as represented by the 'System RAM' entries + in /proc/iomem, that the elfcorehdr buffer/segment can accommodate. + This value is combined with NR_CPUS and multiplied by Elf64_Phdr + size to determine the final buffer size. + config KEXEC_JUMP bool "kexec jump" depends on KEXEC && HIBERNATION diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index a3760ca796aa..432073385b2d 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -212,6 +212,26 @@ typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); +void *arch_map_crash_pages(unsigned long paddr, unsigned long size); +#define arch_map_crash_pages arch_map_crash_pages + +void arch_unmap_crash_pages(void **ptr); +#define arch_unmap_crash_pages arch_unmap_crash_pages + +void arch_crash_handle_hotplug_event(struct kimage *image, + unsigned int hp_action); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +#ifdef CONFIG_HOTPLUG_CPU +static inline int crash_hotplug_cpu_support(void) { return 1; } +#define crash_hotplug_cpu_support crash_hotplug_cpu_support +#endif + +#ifdef CONFIG_MEMORY_HOTPLUG +static inline int crash_hotplug_memory_support(void) { return 1; } +#define crash_hotplug_memory_support crash_hotplug_memory_support +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9ceb93c176a6..8fc7d678ac72 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -397,7 +398,18 @@ int crash_load_segments(struct kimage *image) image->elf_headers = kbuf.buffer; image->elf_headers_sz = kbuf.bufsz; +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) + /* Ensure elfcorehdr segment large enough for hotplug changes */ + kbuf.memsz = + (CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES) * + sizeof(Elf64_Phdr); + /* Mark as usable to crash kernel, else crash kernel fails on boot */ + image->elf_headers_sz = kbuf.memsz; + image->elfcorehdr_index = image->nr_segments; + image->elfcorehdr_index_valid = true; +#else kbuf.memsz = kbuf.bufsz; +#endif kbuf.buf_align = ELF_CORE_HEADER_ALIGN; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); @@ -412,3 +424,93 @@ int crash_load_segments(struct kimage *image) return ret; } #endif /* CONFIG_KEXEC_FILE */ + +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) +/* + * NOTE: The addresses and sizes passed to this routine have + * already been fully aligned on page boundaries. There is no + * need for massaging the address or size. + */ +void *arch_map_crash_pages(unsigned long paddr, unsigned long size) +{ + void *ptr =
[PATCH v12 4/7] kexec: exclude elfcorehdr from the segment digest
When a crash kernel is loaded via the kexec_file_load syscall, the kernel places the various segments (ie crash kernel, crash initrd, boot_params, elfcorehdr, purgatory, etc) in memory. For those architectures that utilize purgatory, a hash digest of the segments is calculated for integrity checking. This digest is embedded into the purgatory image prior to placing purgatory in memory. Since hotplug events cause changes to the elfcorehdr, purgatory integrity checking fails (at crash time, and no kdump created). As a result, this change explicitly excludes the elfcorehdr segment from the list of segments used to create the digest. By doing so, this permits changes to the elfcorehdr in response to hotplug events, without having to also reload purgatory due to the change to the digest. Signed-off-by: Eric DeVolder Acked-by: Baoquan He --- kernel/kexec_file.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 8017eeb43036..d0c2661b3509 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -723,6 +723,12 @@ static int kexec_calculate_store_digests(struct kimage *image) for (j = i = 0; i < image->nr_segments; i++) { struct kexec_segment *ksegment; +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) + /* Exclude elfcorehdr segment to allow future changes via hotplug */ + if (image->elfcorehdr_index_valid && (j == image->elfcorehdr_index)) + continue; +#endif + ksegment = &image->segment[i]; /* * Skip purgatory as it will be modified once we put digest -- 2.31.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v12 0/7] crash: Kernel handling of CPU and memory hot un/plug
When the kdump service is loaded, if a CPU or memory is hot un/plugged, the crash elfcorehdr, which describes the CPUs and memory in the system, must also be updated, else the resulting vmcore is inaccurate (eg. missing either CPU context or memory regions). The current solution utilizes udev to initiate an unload-then-reload of the kdump image (e. kernel, initrd, boot_params, puratory and elfcorehdr) by the userspace kexec utility. In previous posts I have outlined the significant performance problems related to offloading this activity to userspace. This patchset introduces a generic crash hot un/plug handler that registers with the CPU and memory notifiers. Upon CPU or memory changes, this generic handler is invoked and performs important housekeeping, for example obtaining the appropriate lock, and then invokes an architecture specific handler to do the appropriate updates. In the case of x86_64, the arch specific handler generates a new elfcorehdr, and overwrites the old one in memory. No involvement with userspace needed. To realize the benefits/test this patchset, one must make a couple of minor changes to userspace: - Prevent udev from updating kdump crash kernel on hot un/plug changes. Add the following as the first lines to the udev rule file /usr/lib/udev/rules.d/98-kexec.rules: # The kernel handles updates to crash elfcorehdr for cpu and memory changes SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" These lines will cause cpu and memory hot un/plug events to be skipped within this rule file, if the kernel has these changes enabled. - Change to the kexec_file_load for loading the kdump kernel: Eg. on RHEL: in /usr/bin/kdumpctl, change to: standard_kexec_args="-p -d -s" which adds the -s to select kexec_file_load syscall. This patchset supports kexec_load with a modified kexec userspace utility, and a working changeset to the kexec userspace utility is provided here (and to use, the above change to standard_kexec_args would be, for example, to append --hotplug instead of -s). diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c index 9826f6d..4ed395a 100644 --- a/kexec/arch/i386/crashdump-x86.c +++ b/kexec/arch/i386/crashdump-x86.c @@ -48,6 +48,7 @@ #include extern struct arch_options_t arch_options; +extern int do_hotplug; static int get_kernel_page_offset(struct kexec_info *UNUSED(info), struct crash_elf_info *elf_info) @@ -975,6 +976,14 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, } else { memsz = bufsz; } + + /* If hotplug support enabled, use larger size to accomodate changes */ + if (do_hotplug) { + long int nr_cpus = get_nr_cpus(); + memsz = (nr_cpus + CRASH_MAX_MEMORY_RANGES) * sizeof(Elf64_Phdr); + } + +info->elfcorehdr = elfcorehdr = add_buffer(info, tmp, bufsz, memsz, align, min_base, max_addr, -1); dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); diff --git a/kexec/crashdump-elf.c b/kexec/crashdump-elf.c index b8bb686..5e29f7a 100644 --- a/kexec/crashdump-elf.c +++ b/kexec/crashdump-elf.c @@ -43,11 +43,7 @@ int FUNC(struct kexec_info *info, int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len); long int count_cpu; - if (xen_present()) - nr_cpus = xen_get_nr_phys_cpus(); - else - nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - + nr_cpus = get_nr_cpus(); if (nr_cpus < 0) { return -1; } diff --git a/kexec/crashdump.h b/kexec/crashdump.h index 18bd691..28d3278 100644 --- a/kexec/crashdump.h +++ b/kexec/crashdump.h @@ -57,7 +57,6 @@ unsigned long phys_to_virt(struct crash_elf_info *elf_info, unsigned long long paddr); unsigned long xen_architecture(struct crash_elf_info *elf_info); -int xen_get_nr_phys_cpus(void); int xen_get_note(int cpu, uint64_t *addr, uint64_t *len); int xen_get_crashkernel_region(uint64_t *start, uint64_t *end); diff --git a/kexec/kexec-xen.h b/kexec/kexec-xen.h index 70fb576..f54a2dd 100644 --- a/kexec/kexec-xen.h +++ b/kexec/kexec-xen.h @@ -83,5 +83,6 @@ extern int __xc_interface_close(xc_interface *xch); #endif int xen_get_kexec_range(int range, uint64_t *start, uint64_t *end); +int xen_get_nr_phys_cpus(void); #endif /* KEXEC_XEN_H */ diff --git a/kexec/kexec.c b/kexec/kexec.c index 829a6ea..3668b73 100644 --- a/kexec/kexec.c +++ b/kexec/kexec.c @@ -58,6 +58,7 @@ unsigned long long mem_min = 0; unsigned long long mem_max = ULONG_MAX; +int do_hotplug = 0; static unsigned long kexec_flags = 0; /* Flags for kexec file
Re: [PATCH v2 1/3] LoongArch: Add kexec support
Hi, Huacai On 09/09/2022 04:16 PM, Huacai Chen wrote: Hi, Youling, On Fri, Sep 9, 2022 at 11:20 AM Youling Tang wrote: Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the LoongArch architecture that add support for the kexec re-boot mechanis (CONFIG_KEXEC) on LoongArch platforms. Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in PE format. I tested this on LoongArch 3A5000 machine and works as expected, $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline $ sudo kexec -e Signed-off-by: Youling Tang --- arch/loongarch/Kconfig | 11 ++ arch/loongarch/include/asm/kexec.h | 58 arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/head.S| 7 +- arch/loongarch/kernel/machine_kexec.c | 188 arch/loongarch/kernel/relocate_kernel.S | 106 + 6 files changed, 371 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/kexec.h create mode 100644 arch/loongarch/kernel/machine_kexec.c create mode 100644 arch/loongarch/kernel/relocate_kernel.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 9b1f2ab878e9..08e063aaf847 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -422,6 +422,17 @@ config ARCH_IOREMAP protection support. However, you can enable LoongArch DMW-based ioremap() for better performance. +config KEXEC + bool "Kexec system call" + select KEXEC_CORE + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h new file mode 100644 index ..f23506725e00 --- /dev/null +++ b/arch/loongarch/include/asm/kexec.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kexec.h for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_KEXEC_H +#define _ASM_KEXEC_H + +#include +#include + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + /* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +/* Reserve a page for the control code buffer */ +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) + memcpy(newregs, oldregs, sizeof(*newregs)); + else + prepare_frametrace(newregs); +} + +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + unsigned long boot_flag; + unsigned long fdt_addr; +}; I prefer to change boot_flag to efi_boot, the latter is better to correspond the current usage, and keeps consistency with efistub. OK. + +typedef void (*do_kexec_t)(unsigned long boot_flag, + unsigned long fdt_addr, + unsigned long first_ind_entry, + unsigned long jump_addr); I prefer change the order of jump_addr and first_ind_entry here. OK. + +struct kimage; +extern const unsigned char relocate_new_kernel[]; +extern const size_t relocate_new_kernel_size; +extern void kexec_reboot(void); + +#ifdef CONFIG_SMP +extern atomic_t kexec_ready_to_reboot; +extern const unsigned char kexec_smp_wait[]; +#endif + +#endif /* !_ASM_KEXEC_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 7225916dd378..17dc8ce6b5ce 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o + obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index eb3f641d5915..0f786d670e66 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -20,7 +20,12 @@ _head: .word MZ_MAGIC/* "MZ", MS-DOS header */ - .org0x3c/* 0x04 ~ 0x3b reserved */ + .org0x8 + .quad 0 /* Image load offset from start of RAM */ +
Re: [PATCH v2 2/3] LoongArch: Add kdump support
Hi,Huacai On 09/09/2022 04:15 PM, Huacai Chen wrote: Hi, Youling, On Fri, Sep 9, 2022 at 11:20 AM Youling Tang wrote: This patch adds support for kdump, the kernel will reserve a region for the crash kernel and jump there on panic. Arch-specific functions are added to allow for implementing a crash dump file interface, /proc/vmcore, which can be viewed as a ELF file. A user space tool, like kexec-tools, is responsible for allocating a separate region for the core's ELF header within crash kdump kernel memory and filling it in when executing kexec_load(). Then, its location will be advertised to crash dump kernel via a new device-tree property, "linux,elfcorehdr", and crash dump kernel preserves the region for later use with fdt_reserve_elfcorehdr() at boot time. At the same time, it will also limit the crash kdump kernel to the crashkernel area via a new device-tree property, "linux, usable-memory-range", so as not to destroy the original kernel dump data. On crash dump kernel, /proc/vmcore will access the primary kernel's memory with copy_oldmem_page(). I tested this on LoongArch 3A5000 machine and works as expected (Suggest crashkernel parameter is "crashkernel=512M@2560M"), you may test it by triggering a crash through /proc/sysrq_trigger: $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1" # echo c > /proc/sysrq_trigger Signed-off-by: Youling Tang --- arch/loongarch/Kconfig | 22 ++ arch/loongarch/Makefile | 4 ++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/crash_dump.c | 19 + arch/loongarch/kernel/machine_kexec.c | 95 +++-- arch/loongarch/kernel/mem.c | 6 ++ arch/loongarch/kernel/relocate_kernel.S | 6 ++ arch/loongarch/kernel/setup.c | 49 + arch/loongarch/kernel/traps.c | 4 ++ 9 files changed, 200 insertions(+), 6 deletions(-) create mode 100644 arch/loongarch/kernel/crash_dump.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 08e063aaf847..4bf888c1 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -433,6 +433,28 @@ config KEXEC The name comes from the similarity to the exec system call. +config CRASH_DUMP + bool "Build kdump crash kernel" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. + + For more details see Documentation/admin-guide/kdump/kdump.rst + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" + default "0x9000a000" if 64BIT + depends on CRASH_DUMP + help + This gives the XKPRANGE address where the kernel is loaded. + If you plan to use kernel for capturing the crash dump change + this value to start of the reserved region (the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel). + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 69b39ba3a09d..224274c1644e 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -66,7 +66,11 @@ endif cflags-y += -ffreestanding cflags-y += $(call cc-option, -mno-check-zero-division) +ifdef CONFIG_PHYSICAL_START +load-y = $(CONFIG_PHYSICAL_START) +else load-y = 0x9020 +endif bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) drivers-$(CONFIG_PCI) += arch/loongarch/pci/ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 17dc8ce6b5ce..79eee7db1414 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_CRASH_DUMP)+= crash_dump.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c new file mode 100644 index ..13e5d2f7870d --- /dev/null +++ b/arch/loongarch/kernel/crash_dump.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, +size_t csize, unsigned long offset) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = kmap_local_pfn(pfn); + csize = copy_to_iter(vaddr + offset, csize, iter); + kunmap_local(vaddr); + + return csize; +} diff --git a/arch/loongarch/kernel/machin
Re: [PATCH v2 1/3] LoongArch: Add kexec support
Hi, Youling, On Fri, Sep 9, 2022 at 11:20 AM Youling Tang wrote: > > Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the > LoongArch architecture that add support for the kexec re-boot mechanis > (CONFIG_KEXEC) on LoongArch platforms. > > Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in > PE format. > > I tested this on LoongArch 3A5000 machine and works as expected, > > $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline > $ sudo kexec -e > > Signed-off-by: Youling Tang > --- > arch/loongarch/Kconfig | 11 ++ > arch/loongarch/include/asm/kexec.h | 58 > arch/loongarch/kernel/Makefile | 2 + > arch/loongarch/kernel/head.S| 7 +- > arch/loongarch/kernel/machine_kexec.c | 188 > arch/loongarch/kernel/relocate_kernel.S | 106 + > 6 files changed, 371 insertions(+), 1 deletion(-) > create mode 100644 arch/loongarch/include/asm/kexec.h > create mode 100644 arch/loongarch/kernel/machine_kexec.c > create mode 100644 arch/loongarch/kernel/relocate_kernel.S > > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 9b1f2ab878e9..08e063aaf847 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -422,6 +422,17 @@ config ARCH_IOREMAP > protection support. However, you can enable LoongArch DMW-based > ioremap() for better performance. > > +config KEXEC > + bool "Kexec system call" > + select KEXEC_CORE > + help > + kexec is a system call that implements the ability to shutdown your > + current kernel, and to start another kernel. It is like a reboot > + but it is independent of the system firmware. And like a reboot > + you can start any kernel with it, not just Linux. > + > + The name comes from the similarity to the exec system call. > + > config SECCOMP > bool "Enable seccomp to safely compute untrusted bytecode" > depends on PROC_FS > diff --git a/arch/loongarch/include/asm/kexec.h > b/arch/loongarch/include/asm/kexec.h > new file mode 100644 > index ..f23506725e00 > --- /dev/null > +++ b/arch/loongarch/include/asm/kexec.h > @@ -0,0 +1,58 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * kexec.h for kexec > + * > + * Copyright (C) 2022 Loongson Technology Corporation Limited > + */ > + > +#ifndef _ASM_KEXEC_H > +#define _ASM_KEXEC_H > + > +#include > +#include > + > +/* Maximum physical address we can use pages from */ > +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) > +/* Maximum address we can reach in physical address mode */ > +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) > + /* Maximum address we can use for the control code buffer */ > +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) > + > +/* Reserve a page for the control code buffer */ > +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE > + > +/* The native architecture */ > +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH > + > +static inline void crash_setup_regs(struct pt_regs *newregs, > + struct pt_regs *oldregs) > +{ > + if (oldregs) > + memcpy(newregs, oldregs, sizeof(*newregs)); > + else > + prepare_frametrace(newregs); > +} > + > +#define ARCH_HAS_KIMAGE_ARCH > + > +struct kimage_arch { > + unsigned long boot_flag; > + unsigned long fdt_addr; > +}; I prefer to change boot_flag to efi_boot, the latter is better to correspond the current usage, and keeps consistency with efistub. > + > +typedef void (*do_kexec_t)(unsigned long boot_flag, > + unsigned long fdt_addr, > + unsigned long first_ind_entry, > + unsigned long jump_addr); I prefer change the order of jump_addr and first_ind_entry here. > + > +struct kimage; > +extern const unsigned char relocate_new_kernel[]; > +extern const size_t relocate_new_kernel_size; > +extern void kexec_reboot(void); > + > +#ifdef CONFIG_SMP > +extern atomic_t kexec_ready_to_reboot; > +extern const unsigned char kexec_smp_wait[]; > +#endif > + > +#endif /* !_ASM_KEXEC_H */ > diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile > index 7225916dd378..17dc8ce6b5ce 100644 > --- a/arch/loongarch/kernel/Makefile > +++ b/arch/loongarch/kernel/Makefile > @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o > obj-$(CONFIG_MODULES) += module.o module-sections.o > obj-$(CONFIG_STACKTRACE) += stacktrace.o > > +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o > + > obj-$(CONFIG_PROC_FS) += proc.o > > obj-$(CONFIG_SMP) += smp.o > diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S > index eb3f641d5915..0f786d670e66 100644 > --- a/arch/loongarch/kernel/head.S > +++ b/arch/loongarch/kernel/head.S > @@ -20,7 +20,12 @@ > > _head: > .word MZ_MAGIC/* "MZ", MS-DOS head
Re: [PATCH v2 2/3] LoongArch: Add kdump support
Hi, Youling, On Fri, Sep 9, 2022 at 11:20 AM Youling Tang wrote: > > This patch adds support for kdump, the kernel will reserve a region > for the crash kernel and jump there on panic. > > Arch-specific functions are added to allow for implementing a crash > dump file interface, /proc/vmcore, which can be viewed as a ELF file. > > A user space tool, like kexec-tools, is responsible for allocating a > separate region for the core's ELF header within crash kdump kernel > memory and filling it in when executing kexec_load(). > > Then, its location will be advertised to crash dump kernel via a new > device-tree property, "linux,elfcorehdr", and crash dump kernel preserves > the region for later use with fdt_reserve_elfcorehdr() at boot time. > > At the same time, it will also limit the crash kdump kernel to the > crashkernel area via a new device-tree property, "linux, usable-memory-range", > so as not to destroy the original kernel dump data. > > On crash dump kernel, /proc/vmcore will access the primary kernel's memory > with copy_oldmem_page(). > > I tested this on LoongArch 3A5000 machine and works as expected (Suggest > crashkernel parameter is "crashkernel=512M@2560M"), you may test it by > triggering a crash through /proc/sysrq_trigger: > > $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1" > # echo c > /proc/sysrq_trigger > > Signed-off-by: Youling Tang > --- > arch/loongarch/Kconfig | 22 ++ > arch/loongarch/Makefile | 4 ++ > arch/loongarch/kernel/Makefile | 1 + > arch/loongarch/kernel/crash_dump.c | 19 + > arch/loongarch/kernel/machine_kexec.c | 95 +++-- > arch/loongarch/kernel/mem.c | 6 ++ > arch/loongarch/kernel/relocate_kernel.S | 6 ++ > arch/loongarch/kernel/setup.c | 49 + > arch/loongarch/kernel/traps.c | 4 ++ > 9 files changed, 200 insertions(+), 6 deletions(-) > create mode 100644 arch/loongarch/kernel/crash_dump.c > > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 08e063aaf847..4bf888c1 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -433,6 +433,28 @@ config KEXEC > > The name comes from the similarity to the exec system call. > > +config CRASH_DUMP > + bool "Build kdump crash kernel" > + help > + Generate crash dump after being started by kexec. This should > + be normally only set in special crash dump kernels which are > + loaded in the main kernel with kexec-tools into a specially > + reserved region and then later executed after a crash by > + kdump/kexec. > + > + For more details see Documentation/admin-guide/kdump/kdump.rst > + > +config PHYSICAL_START > + hex "Physical address where the kernel is loaded" > + default "0x9000a000" if 64BIT > + depends on CRASH_DUMP > + help > + This gives the XKPRANGE address where the kernel is loaded. > + If you plan to use kernel for capturing the crash dump change > + this value to start of the reserved region (the "X" value as > + specified in the "crashkernel=YM@XM" command line boot parameter > + passed to the panic-ed kernel). > + > config SECCOMP > bool "Enable seccomp to safely compute untrusted bytecode" > depends on PROC_FS > diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile > index 69b39ba3a09d..224274c1644e 100644 > --- a/arch/loongarch/Makefile > +++ b/arch/loongarch/Makefile > @@ -66,7 +66,11 @@ endif > cflags-y += -ffreestanding > cflags-y += $(call cc-option, -mno-check-zero-division) > > +ifdef CONFIG_PHYSICAL_START > +load-y = $(CONFIG_PHYSICAL_START) > +else > load-y = 0x9020 > +endif > bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) > > drivers-$(CONFIG_PCI) += arch/loongarch/pci/ > diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile > index 17dc8ce6b5ce..79eee7db1414 100644 > --- a/arch/loongarch/kernel/Makefile > +++ b/arch/loongarch/kernel/Makefile > @@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o > obj-$(CONFIG_STACKTRACE) += stacktrace.o > > obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o > +obj-$(CONFIG_CRASH_DUMP)+= crash_dump.o > > obj-$(CONFIG_PROC_FS) += proc.o > > diff --git a/arch/loongarch/kernel/crash_dump.c > b/arch/loongarch/kernel/crash_dump.c > new file mode 100644 > index ..13e5d2f7870d > --- /dev/null > +++ b/arch/loongarch/kernel/crash_dump.c > @@ -0,0 +1,19 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include > +#include > +#include > + > +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, > +size_t csize, unsigned long offset) > +{ > + void *vaddr; > + > + if (!csize) > + return 0; > + > + vaddr = k