Logic for preserving the CPU registers and memory regions has been done in previous patches.
Write those data at the relevant memory address, such as PROC_DUMP_AREA for CPU registers, and MDRT for preserved memory regions. Also export "mpipl-boot" device tree node, for kernel to know that it's a 'dump active' boot Signed-off-by: Aditya Gupta <[email protected]> --- hw/ppc/pnv.c | 31 ++++++++++++- hw/ppc/pnv_mpipl.c | 103 +++++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/pnv.h | 1 + 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 643558f374e9..7c36f3a00e90 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -750,6 +750,7 @@ static void pnv_reset(MachineState *machine, ResetType type) PnvMachineState *pnv = PNV_MACHINE(machine); IPMIBmc *bmc; void *fdt; + int node_offset; qemu_devices_reset(type); @@ -780,7 +781,35 @@ static void pnv_reset(MachineState *machine, ResetType type) _FDT((fdt_pack(fdt))); } - if (!pnv->mpipl_state.is_next_boot_mpipl) { + /* + * If it's a MPIPL boot, add the "mpipl-boot" property, and reset the + * boolean for MPIPL boot for next boot + */ + if (pnv->mpipl_state.is_next_boot_mpipl) { + void *fdt_copy = g_malloc0(FDT_MAX_SIZE); + + /* Write the preserved MDRT and CPU State Data */ + do_mpipl_write(pnv); + + /* Create a writable copy of the fdt */ + _FDT((fdt_open_into(fdt, fdt_copy, FDT_MAX_SIZE))); + + node_offset = fdt_path_offset(fdt_copy, "/ibm,opal/dump"); + _FDT((fdt_appendprop_u64(fdt_copy, node_offset, "mpipl-boot", 1))); + + /* Update the fdt, and free the original fdt */ + if (fdt != machine->fdt) { + /* + * Only free the fdt if it's not machine->fdt, to prevent + * double free, since we already free machine->fdt later + */ + g_free(fdt); + } + fdt = fdt_copy; + + /* This boot is an MPIPL, reset the boolean for next boot */ + pnv->mpipl_state.is_next_boot_mpipl = false; + } else { /* * Set the "Thread Register State Entry Size", so that firmware can * allocate enough memory to capture CPU state in the event of a diff --git a/hw/ppc/pnv_mpipl.c b/hw/ppc/pnv_mpipl.c index 8b41938c2e87..3c9755a6c440 100644 --- a/hw/ppc/pnv_mpipl.c +++ b/hw/ppc/pnv_mpipl.c @@ -19,6 +19,8 @@ (pnv->mpipl_state.skiboot_base + MDST_TABLE_OFF) #define MDDT_TABLE_RELOCATED \ (pnv->mpipl_state.skiboot_base + MDDT_TABLE_OFF) +#define MDRT_TABLE_RELOCATED \ + (pnv->mpipl_state.skiboot_base + MDRT_TABLE_OFF) #define PROC_DUMP_RELOCATED \ (pnv->mpipl_state.skiboot_base + PROC_DUMP_AREA_OFF) @@ -263,6 +265,100 @@ static void pnv_mpipl_preserve_cpu_state(PnvMachineState *pnv) } } +static void pnv_mpipl_write_cpu_state(PnvMachineState *pnv) +{ + MpiplProcDumpArea *proc_area = &pnv->mpipl_state.proc_area; + MpiplPreservedCPUState *cpu_state = pnv->mpipl_state.cpu_states; + const uint32_t num_cpu_states = pnv->mpipl_state.num_cpu_states; + hwaddr next_regentries_hdr; + + if (be32_to_cpu(proc_area->alloc_size) < + (num_cpu_states * sizeof(MpiplPreservedCPUState))) { + qemu_log_mask(LOG_GUEST_ERROR, + "MPIPL: Size of buffer allocate by skiboot (%u bytes) is not" + "enough to save all CPUs registers needed (%ld bytes)", + be32_to_cpu(proc_area->alloc_size), + num_cpu_states * sizeof(MpiplPreservedCPUState)); + + return; + } + + proc_area->version = PROC_DUMP_AREA_VERSION_P9; + + /* + * This is the stride kernel/firmware should use to jump from a + * register entries header to next CPU's header + */ + proc_area->thread_size = cpu_to_be32(sizeof(MpiplPreservedCPUState)); + + /* Write the header and register entries for each CPU */ + next_regentries_hdr = be64_to_cpu(proc_area->alloc_addr) & (~HRMOR_BIT); + for (int i = 0; i < num_cpu_states; ++i) { + cpu_physical_memory_write(next_regentries_hdr, &cpu_state->hdr, + sizeof(MpiplRegDataHdr)); + + cpu_physical_memory_write(next_regentries_hdr + sizeof(MpiplRegDataHdr), + &cpu_state->reg_entries, + NUM_REGS_PER_CPU * sizeof(MpiplRegEntry)); + + /* + * According to HDAT section: "15.3.1.5 Architected Register Data content": + * + * The next register entries header will be at current header + + * "Thread Register State Entry size" + * + * Note: proc_area.thread_size == sizeof(MpiplPreservedCPUState) + */ + next_regentries_hdr += sizeof(MpiplPreservedCPUState); + ++cpu_state; + } + + /* Point the destination address to the preserved memory region */ + proc_area->dest_addr = proc_area->alloc_addr; + proc_area->act_size = cpu_to_be32(num_cpu_states * + sizeof(MpiplPreservedCPUState)); + + cpu_physical_memory_write(PROC_DUMP_AREA_OFF, proc_area, + sizeof(MpiplProcDumpArea)); +} + +static void pnv_mpipl_write_mdrt(PnvMachineState *pnv) +{ + MpiplPreservedState *state = &pnv->mpipl_state; + AddressSpace *default_as = &address_space_memory; + MemTxResult io_result; + MemTxAttrs attrs; + + /* Mark the memory transactions as privileged memory access */ + attrs.user = 0; + attrs.memory = 1; + + /* + * Generally writes from platform during MPIPL don't go to a relocated + * skiboot address + * + * Though for MDRT we are doing so, as this is the address skiboot + * considers by default for MDRT + * + * MDRT/MDST/MDDT base addresses are actually meant to be shared by + * platform in SPIRA structures. + * + * Not implementing SPIRA as it increases complexity for no gains. + * Using the default address skiboot expects for MDRT, which is the + * relocated MDRT, hence writing to it + * + * Other tables like MDST/MDDT should not be written to relocated + * addresses, as skiboot will overwrite anything from SKIBOOT_BASE till + * SKIBOOT_BASE+SKIBOOT_SIZE (which is 0x30000000-0x31c00000 by default) + */ + io_result = address_space_write(default_as, MDRT_TABLE_RELOCATED, attrs, + state->mdrt_table, + state->num_mdrt_entries * sizeof(MdrtTableEntry)); + if (io_result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "MPIPL: Failed to write MDRT table\n"); + } +} + void do_mpipl_preserve(PnvMachineState *pnv) { pause_all_vcpus(); @@ -283,3 +379,10 @@ void do_mpipl_preserve(PnvMachineState *pnv) */ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } + +void do_mpipl_write(PnvMachineState *pnv) +{ + pnv_mpipl_write_mdrt(pnv); + pnv_mpipl_write_cpu_state(pnv); +} + diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 02baa0012460..a71e968c32e0 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -295,5 +295,6 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor); /* MPIPL helpers */ void do_mpipl_preserve(PnvMachineState *pnv); +void do_mpipl_write(PnvMachineState *pnv); #endif /* PPC_PNV_H */ -- 2.52.0
