On Friday 05 September 2014 01:34 PM, Alexander Graf wrote: > > > On 04.09.14 13:13, Aravinda Prasad wrote: >> Whenever there is a physical memory error due to bit >> flips, which cannot be corrected by hardware, the error >> is passed on to the kernel. If the memory address in >> error belongs to guest address space then guest kernel >> is responsible to take action. Hence the error is passed >> on to guest via KVM by invoking 0x200 NMI vector. >> >> However, guest OS, as per PAPR, expects an error log >> upon such error. This patch registers a new hcall >> which is issued from 0x200 interrupt vector and builds >> the error log, copies the error log to rtas space and >> passes the address of the error log to guest >> >> Enhancement to KVM to perform above functionality is >> already in upstream kernel. >> >> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com> >> --- >> hw/ppc/spapr_hcall.c | 154 >> ++++++++++++++++++++++++++++++++++++++++++++++++ >> include/hw/ppc/spapr.h | 4 + >> 2 files changed, 157 insertions(+), 1 deletion(-) >> >> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c >> index 01650ba..c3aa448 100644 >> --- a/hw/ppc/spapr_hcall.c >> +++ b/hw/ppc/spapr_hcall.c >> @@ -14,6 +14,88 @@ struct SPRSyncState { >> target_ulong mask; >> }; >> >> +/* Offset from rtas-base where error log is placed */ >> +#define RTAS_ERROR_OFFSET (TARGET_PAGE_SIZE) > > You can't assume this. Please compute the value at the same place you > compute the rtas-size.
sure > >> + >> +#define RTAS_ELOG_SEVERITY_SHIFT 0x5 >> +#define RTAS_ELOG_DISPOSITION_SHIFT 0x3 >> +#define RTAS_ELOG_INITIATOR_SHIFT 0x4 >> + >> +/* >> + * Only required RTAS event severity, disposition, initiator >> + * target and type are copied from arch/powerpc/include/asm/rtas.h >> + */ >> + >> +/* RTAS event severity */ >> +#define RTAS_SEVERITY_ERROR_SYNC 0x3 >> + >> +/* RTAS event disposition */ >> +#define RTAS_DISP_NOT_RECOVERED 0x2 >> + >> +/* RTAS event initiator */ >> +#define RTAS_INITIATOR_MEMORY 0x4 >> + >> +/* RTAS event target */ >> +#define RTAS_TARGET_MEMORY 0x4 >> + >> +/* RTAS event type */ >> +#define RTAS_TYPE_ECC_UNCORR 0x09 >> + >> +/* >> + * Currently KVM only passes on the uncorrected machine >> + * check memory error to guest. Other machine check errors >> + * such as SLB multi-hit and TLB multi-hit are recovered >> + * in KVM and are not passed on to guest. >> + * >> + * DSISR Bit for uncorrected machine check error. Based >> + * on arch/powerpc/include/asm/mce.h > > Please don't include Linux code. This file is GPLv2+ licensed and I > don't want to taint it as GPLv2 only just for the sake of mce. Hmm.. ok. In that case I should not copy rtas_error_log structure below from kernel source as well. > >> + */ >> +#define PPC_BIT(bit) (0x8000000000000000ULL >> bit) >> +#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */ >> + >> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */ >> +struct rtas_error_log { >> + /* Byte 0 */ >> + uint8_t byte0; /* Architectural version */ >> + >> + /* Byte 1 */ >> + uint8_t byte1; >> + /* XXXXXXXX >> + * XXX 3: Severity level of error >> + * XX 2: Degree of recovery >> + * X 1: Extended log present? >> + * XX 2: Reserved >> + */ >> + >> + /* Byte 2 */ >> + uint8_t byte2; >> + /* XXXXXXXX >> + * XXXX 4: Initiator of event >> + * XXXX 4: Target of failed operation >> + */ >> + uint8_t byte3; /* General event or error*/ >> +}; >> + >> +/* >> + * Data format in RTAS-Blob >> + * >> + * This structure contains error information related to Machine >> + * Check exception. This is filled up and copied to rtas-blob >> + * upon machine check exception. >> + */ >> +struct rtas_mc_log { >> + target_ulong srr0; >> + target_ulong srr1; >> + /* >> + * Beginning of error log address. This is properly >> + * populated and passed on to OS registered machine >> + * check notification routine upon machine check >> + * exception >> + */ >> + target_ulong r3; >> + struct rtas_error_log err_log; >> +}; >> + >> static void do_spr_sync(void *arg) >> { >> struct SPRSyncState *s = arg; >> @@ -586,6 +668,77 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> return 0; >> } >> >> +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment >> *spapr, >> + target_ulong opcode, target_ulong *args) >> +{ >> + struct rtas_mc_log mc_log; >> + CPUPPCState *env = &cpu->env; >> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); >> + >> + /* >> + * We save the original r3 register in SPRG2 in 0x200 vector, >> + * which is patched during call to ibm.nmi-register. Original >> + * r3 is required to be included in error log >> + */ >> + mc_log.r3 = env->spr[SPR_SPRG2]; > > Don't you have to call cpu_synchronize_registers() before you access > SPRG2? Otherwise the value may be stale. Yes true. Will include. > >> + >> + /* >> + * SRR0 and SRR1, containing nip and msr at the time of exception, >> + * are clobbered when we return from this hcall. Hence they >> + * need to be properly saved and restored. We save srr0 >> + * and srr1 in rtas blob and restore it in 0x200 vector >> + * before branching to OS registered machine check handler >> + */ >> + mc_log.srr0 = env->spr[SPR_SRR0]; >> + mc_log.srr1 = env->spr[SPR_SRR1]; >> + >> + /* Set error log fields */ >> + mc_log.err_log.byte0 = 0x00; >> + mc_log.err_log.byte1 = >> + (RTAS_SEVERITY_ERROR_SYNC << RTAS_ELOG_SEVERITY_SHIFT); >> + mc_log.err_log.byte1 |= >> + (RTAS_DISP_NOT_RECOVERED << RTAS_ELOG_DISPOSITION_SHIFT); >> + mc_log.err_log.byte2 = >> + (RTAS_INITIATOR_MEMORY << RTAS_ELOG_INITIATOR_SHIFT); >> + mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY; >> + >> + if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) { >> + mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR; >> + } else { >> + mc_log.err_log.byte3 = 0x0; >> + } >> + >> + /* Handle all Host/Guest LE/BE combinations */ >> + if ((*pcc->interrupts_big_endian)(cpu)) { > > This should check MSR.LE, not ILE. I did not quite get you. Did you mean to say I should check LE bit in MSR instead of invoking pcc->interrupts_big_endian? If so I should also change that check in 4/4 where I patch 0x200. Regards, Aravinda > > > Alex > -- Regards, Aravinda