This patch adds FWNMI support in qemu for powerKVM
guests by handling the ibm,nmi-register rtas call.
Whenever OS issues ibm,nmi-register RTAS call, the
machine check notification address is saved and the
machine check interrupt vector 0x200 is patched to
issue a private hcall.

This patch also handles the cases when multi-processors
experience machine check at or about the same time.
As per PAPR, subsequent processors serialize waiting
for the first processor to issue the ibm,nmi-interlock call.
The second processor retries if the first processor which
received a machine check is still reading the error log
and is yet to issue ibm,nmi-interlock call.

Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>
---
 hw/ppc/spapr_hcall.c   |   16 +++++++
 hw/ppc/spapr_rtas.c    |  116 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    8 +++
 3 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index c3aa448..380e75c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -96,6 +96,9 @@ struct rtas_mc_log {
     struct rtas_error_log err_log;
 };
 
+/* Whether machine check handling is in progress by any CPU */
+bool mc_in_progress;
+
 static void do_spr_sync(void *arg)
 {
     struct SPRSyncState *s = arg;
@@ -676,6 +679,19 @@ static target_ulong h_report_mc_err(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 
     /*
+     * Only one VCPU can process machine check NMI at a time. Hence
+     * set the lock mc_in_progress. Once the VCPU finishes processing
+     * NMI, it executes ibm,nmi-interlock and mc_in_progress is unset
+     * in ibm,nmi-interlock handler. Meanwhile if other VCPUs encounter
+     * NMI we return 0 asking the VCPU to retry h_report_mc_err
+     */
+    if (mc_in_progress == 1) {
+        return 0;
+    }
+
+    mc_in_progress = 1;
+
+    /*
      * We save the original r3 register in SPRG2 in 0x200 vector,
      * which is patched during call to ibm.nmi-register. Original
      * r3 is required to be included in error log
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2ec2a8e..dfb8ee1 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -36,6 +36,9 @@
 
 #include <libfdt.h>
 
+#define BRANCH_INST_MASK  0xFC000000
+extern bool mc_in_progress;
+
 static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                    uint32_t token, uint32_t nargs,
                                    target_ulong args,
@@ -290,6 +293,113 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
     rtas_st(rets, 0, ret);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+                                  sPAPREnvironment *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args,
+                                  uint32_t nret, target_ulong rets)
+{
+    int i;
+    uint32_t ori_inst = 0x60630000;
+    uint32_t branch_inst = 0x48000002;
+    target_ulong guest_machine_check_addr;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    /*
+     * Trampoline saves r3 in sprg2 and issues private hcall
+     * to request qemu to build error log. QEMU builds the
+     * error log, copies to rtas-blob and returns the address.
+     * The initial 16 bytes in rtas-blob consists of saved srr0
+     * and srr1 which we restore and pass on the actual error
+     * log address to OS handled mcachine check notification
+     * routine
+     */
+    uint32_t trampoline[] = {
+        0x7c7243a6,    /* mtspr   SPRN_SPRG2,r3 */
+        0x38600000,    /* li      r3,0   */
+        0x60630000,    /* ori     r3,r3,KVMPPC_H_REPORT_MC_ERR
+                        * The KVMPPC_H_REPORT_MC_ERR value is
+                        * patched below */
+                       /* Issue H_CALL */
+        0x44000022,    /*  sc      1     */
+        0x2fa30000,    /* cmplwi r3,0 */
+        0x409e0008,    /* bne continue */
+        0x4800020a,    /* retry KVMPPC_H_REPORT_MC_ERR */
+                       /* KVMPPC_H_REPORT_MC_ERR restores the SPRG2,
+                        * hence we are free to clobber it */
+        0x7c9243a6,    /* mtspr r4 sprg2 */
+        0xe8830000,    /* ld r4, 0(r3) */
+        0x7c9a03a6,    /* mtspr r4, srr0 */
+        0xe8830008,    /* ld r4, 8(r3) */
+        0x7c9b03a6,    /* mtspr r4, srr1 */
+        0x38630010,    /* addi r3,r3,16 */
+        0x7c9242a6,    /* mfspr r4 sprg2 */
+        0x48000002,    /* Branch to address registered
+                        * by OS. The branch address is
+                        * patched below */
+        0x48000000,    /* b . */
+    };
+    int total_inst = sizeof(trampoline) / sizeof(uint32_t);
+
+    /* Store the system reset and machine check address */
+    guest_machine_check_addr = rtas_ld(args, 1);
+
+    /* Safety Check */
+    QEMU_BUILD_BUG_ON(sizeof(trampoline) > MC_INTERRUPT_VECTOR_SIZE);
+
+    /* Update the KVMPPC_H_REPORT_MC_ERR value in trampoline */
+    ori_inst |= KVMPPC_H_REPORT_MC_ERR;
+    memcpy(&trampoline[2], &ori_inst, sizeof(ori_inst));
+
+    /*
+     * Sanity check guest_machine_check_addr to prevent clobbering
+     * operator value in branch instruction
+     */
+    if (guest_machine_check_addr & BRANCH_INST_MASK) {
+        fprintf(stderr, "Unable to register ibm,nmi_register: "
+                "Invalid machine check handler address\n");
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    /*
+     * Update the branch instruction in trampoline
+     * with the absolute machine check address requested by OS.
+     */
+    branch_inst |= guest_machine_check_addr;
+    memcpy(&trampoline[14], &branch_inst, sizeof(branch_inst));
+
+    /* Handle all Host/Guest LE/BE combinations */
+    if ((*pcc->interrupts_big_endian)(cpu)) {
+        for (i = 0; i < total_inst; i++) {
+            trampoline[i] = cpu_to_be32(trampoline[i]);
+        }
+    } else {
+        for (i = 0; i < total_inst; i++) {
+            trampoline[i] = cpu_to_le32(trampoline[i]);
+        }
+    }
+
+    /* Patch 0x200 NMI interrupt vector memory area of guest */
+    cpu_physical_memory_write(MC_INTERRUPT_VECTOR, trampoline,
+                              sizeof(trampoline));
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+                                   sPAPREnvironment *spapr,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+    /*
+     * VCPU issuing ibm,nmi-interlock is done with NMI handling,
+     * hence unset mc_in_progress.
+     */
+    mc_in_progress = 0;
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -419,6 +529,12 @@ static void core_rtas_register_types(void)
                         rtas_ibm_set_system_parameter);
     spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
                         rtas_ibm_os_term);
+    spapr_rtas_register(RTAS_IBM_NMI_REGISTER,
+                        "ibm,nmi-register",
+                        rtas_ibm_nmi_register);
+    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK,
+                        "ibm,nmi-interlock",
+                        rtas_ibm_nmi_interlock);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 74cf881..bbb2747 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -384,8 +384,10 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 #define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
 #define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
 #define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
+#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x20)
+#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x21)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x20)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x22)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
@@ -483,4 +485,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
*propname,
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
                       sPAPRTCETable *tcet);
 
+/* Machine Check Interrupt related macros */
+#define MC_INTERRUPT_VECTOR           0x200
+#define MC_INTERRUPT_VECTOR_SIZE      0x100
+
 #endif /* !defined (__HW_SPAPR_H__) */


Reply via email to