Add support in the kernel to process the crash'ed kernel's memory preserved during MPIPL and export it as /proc/vmcore file for the userland scripts to filter and analyze it later.
Signed-off-by: Hari Bathini <hbath...@linux.ibm.com> --- arch/powerpc/platforms/powernv/opal-fadump.c | 190 ++++++++++++++++++++++++++ 1 file changed, 187 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 9c68c83..dffc0e7 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -18,6 +18,7 @@ #include <linux/of_fdt.h> #include <linux/libfdt.h> #include <linux/mm.h> +#include <linux/crash_dump.h> #include <asm/page.h> #include <asm/opal.h> @@ -25,6 +26,7 @@ #include "../../kernel/fadump-common.h" #include "opal-fadump.h" +static const struct opal_fadump_mem_struct *opal_fdm_active; static struct opal_fadump_mem_struct *opal_fdm; static void opal_fadump_update_config(struct fw_dump *fadump_conf, @@ -41,6 +43,50 @@ static void opal_fadump_update_config(struct fw_dump *fadump_conf, fadump_conf->boot_mem_dest_addr); fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + + /* Start address of preserve area (permanent reservation) */ + fadump_conf->preserv_area_start = fadump_conf->boot_mem_dest_addr; + pr_debug("Preserve area start address: 0x%lx\n", + fadump_conf->preserv_area_start); +} + +/* + * This function is called in the capture kernel to get configuration details + * from metadata setup by the first kernel. + */ +static void opal_fadump_get_config(struct fw_dump *fadump_conf, + const struct opal_fadump_mem_struct *fdm) +{ + unsigned long base, size, last_end, hole_size; + int i; + + if (!fadump_conf->dump_active) + return; + + last_end = 0; + hole_size = 0; + fadump_conf->boot_memory_size = 0; + + if (fdm->region_cnt) + pr_debug("Boot memory regions:\n"); + + for (i = 0; i < fdm->region_cnt; i++) { + base = fdm->rgn[i].src; + size = fdm->rgn[i].size; + pr_debug("\t%d. base: 0x%lx, size: 0x%lx\n", + (i + 1), base, size); + + fadump_conf->boot_mem_addr[i] = base; + fadump_conf->boot_mem_size[i] = size; + fadump_conf->boot_memory_size += size; + hole_size += (base - last_end); + + last_end = base + size; + } + + fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); + fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + opal_fadump_update_config(fadump_conf, fdm); } static ulong opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) @@ -174,27 +220,127 @@ static int opal_fadump_unregister_fadump(struct fw_dump *fadump_conf) static int opal_fadump_invalidate_fadump(struct fw_dump *fadump_conf) { - return -EIO; + s64 rc; + + rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0); + if (rc) { + pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc); + return -EIO; + } + + fadump_conf->dump_active = 0; + opal_fdm_active = NULL; + return 0; +} + +/* + * Convert CPU state data saved at the time of crash into ELF notes. + */ +static int __init opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf) +{ + u32 num_cpus, *note_buf; + struct fadump_crash_info_header *fdh = NULL; + + num_cpus = 1; + /* Allocate buffer to hold cpu crash notes. */ + fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); + fadump_conf->cpu_notes_buf_size = + PAGE_ALIGN(fadump_conf->cpu_notes_buf_size); + note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size); + if (!note_buf) { + pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n", + fadump_conf->cpu_notes_buf_size); + return -ENOMEM; + } + fadump_conf->cpu_notes_buf = __pa(note_buf); + + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", + (num_cpus * sizeof(note_buf_t)), note_buf); + + if (fadump_conf->fadumphdr_addr) + fdh = __va(fadump_conf->fadumphdr_addr); + + if (fdh && (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)) { + note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs)); + final_note(note_buf); + + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header(fadump_conf, + __va(fdh->elfcorehdr_addr)); + } + + return 0; } static int __init opal_fadump_process_fadump(struct fw_dump *fadump_conf) { - return -EINVAL; + struct fadump_crash_info_header *fdh; + int rc = 0; + + if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) + return -EINVAL; + + /* Validate the fadump crash info header */ + fdh = __va(fadump_conf->fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + pr_err("Crash info header is not valid.\n"); + return -EINVAL; + } + + /* + * TODO: To build cpu notes, find a way to map PIR to logical id. + * Also, we may need different method for pseries and powernv. + * The currently booted kernel could have a different PIR to + * logical id mapping. So, try saving info of previous kernel's + * paca to get the right PIR to logical id mapping. + */ + rc = opal_fadump_build_cpu_notes(fadump_conf); + if (rc) + return rc; + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return rc; } static void opal_fadump_region_show(struct fw_dump *fadump_conf, struct seq_file *m) { int i; - const struct opal_fadump_mem_struct *fdm_ptr = opal_fdm; + const struct opal_fadump_mem_struct *fdm_ptr; u64 dumped_bytes = 0; + if (fadump_conf->dump_active) + fdm_ptr = opal_fdm_active; + else + fdm_ptr = opal_fdm; + for (i = 0; i < fdm_ptr->region_cnt; i++) { + /* + * Only regions that are registered for MPIPL + * would have dump data. + */ + if ((fadump_conf->dump_active) && + (i < fdm_ptr->registered_regions)) + dumped_bytes = fdm_ptr->rgn[i].size; + seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", fdm_ptr->rgn[i].size, dumped_bytes); } + + /* Dump is active. Show reserved area start address. */ + if (fadump_conf->dump_active) { + seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n", + fadump_conf->reserve_dump_area_start); + } } static void opal_fadump_trigger(struct fadump_crash_info_header *fdh, @@ -225,6 +371,7 @@ static struct fadump_ops opal_fadump_ops = { int __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node) { unsigned long dn; + const __be32 *prop; /* * Check if Firmware-Assisted Dump is supported. if yes, check @@ -251,5 +398,42 @@ int __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node) */ fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE); + /* + * Check if dump has been initiated on last reboot. + */ + prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); + if (prop) { + u64 addr = 0; + s64 ret; + const struct opal_fadump_mem_struct *r_opal_fdm_active; + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_err("Failed to get Kernel metadata (%lld)\n", ret); + return 1; + } + + addr = be64_to_cpu(addr); + pr_debug("Kernel metadata addr: %llx\n", addr); + + opal_fdm_active = __va(addr); + r_opal_fdm_active = (void *)addr; + if (r_opal_fdm_active->version != OPAL_FADUMP_VERSION) { + pr_err("FADump active but version (%u) unsupported!\n", + r_opal_fdm_active->version); + return 1; + } + + /* Kernel regions not registered with f/w for MPIPL */ + if (r_opal_fdm_active->registered_regions == 0) { + opal_fdm_active = NULL; + return 1; + } + + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; + opal_fadump_get_config(fadump_conf, r_opal_fdm_active); + } + return 1; }