From: Hari Bathini <hbath...@linux.vnet.ibm.com>

Firmware provides architected register state data at the time of crash.
Process this data and build CPU notes to append to ELF core.

Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
Signed-off-by: Vasant Hegde <hegdevas...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump-common.h          |    4 +
 arch/powerpc/platforms/powernv/opal-fadump.c |  197 ++++++++++++++++++++++++--
 arch/powerpc/platforms/powernv/opal-fadump.h |   39 +++++
 3 files changed, 228 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/fadump-common.h 
b/arch/powerpc/kernel/fadump-common.h
index 968745a..2dd0d9d 100644
--- a/arch/powerpc/kernel/fadump-common.h
+++ b/arch/powerpc/kernel/fadump-common.h
@@ -121,7 +121,11 @@ struct fw_dump {
         */
        unsigned long   preserv_area_start;
 
+       unsigned long   cpu_state_destination_addr;
+       unsigned long   cpu_state_data_version;
+       unsigned long   cpu_state_entry_size;
        unsigned long   cpu_state_data_size;
+
        unsigned long   hpte_region_size;
 
        unsigned long   boot_memory_size;
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c 
b/arch/powerpc/platforms/powernv/opal-fadump.c
index dffc0e7..479967c 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -27,6 +27,7 @@
 #include "opal-fadump.h"
 
 static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
 static struct opal_fadump_mem_struct *opal_fdm;
 
 static void opal_fadump_update_config(struct fw_dump *fadump_conf,
@@ -233,15 +234,115 @@ static int opal_fadump_invalidate_fadump(struct fw_dump 
*fadump_conf)
        return 0;
 }
 
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+                                                u32 reg_type, u32 reg_num,
+                                                u64 reg_val)
+{
+       if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+               if (reg_num < 32)
+                       regs->gpr[reg_num] = reg_val;
+               return;
+       }
+
+       switch (reg_num) {
+       case SPRN_CTR:
+               regs->ctr = reg_val;
+               break;
+       case SPRN_LR:
+               regs->link = reg_val;
+               break;
+       case SPRN_XER:
+               regs->xer = reg_val;
+               break;
+       case SPRN_DAR:
+               regs->dar = reg_val;
+               break;
+       case SPRN_DSISR:
+               regs->dsisr = reg_val;
+               break;
+       case HDAT_FADUMP_REG_ID_NIP:
+               regs->nip = reg_val;
+               break;
+       case HDAT_FADUMP_REG_ID_MSR:
+               regs->msr = reg_val;
+               break;
+       case HDAT_FADUMP_REG_ID_CCR:
+               regs->ccr = reg_val;
+               break;
+       }
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+                                        unsigned int reg_entry_size,
+                                        struct pt_regs *regs)
+{
+       int i;
+       struct hdat_fadump_reg_entry *reg_entry;
+
+       memset(regs, 0, sizeof(struct pt_regs));
+
+       for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+               reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+               opal_fadump_set_regval_regnum(regs,
+                                             be32_to_cpu(reg_entry->reg_type),
+                                             be32_to_cpu(reg_entry->reg_num),
+                                             be64_to_cpu(reg_entry->reg_val));
+       }
+}
+
+static inline bool __init is_thread_core_inactive(u8 core_state)
+{
+       bool is_inactive = false;
+
+       if (core_state == HDAT_FADUMP_CORE_INACTIVE)
+               is_inactive = true;
+
+       return is_inactive;
+}
+
 /*
  * Convert CPU state data saved at the time of crash into ELF notes.
+ *
+ * Each register entry is of 16 bytes, A numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation.
  */
 static int __init opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
 {
        u32 num_cpus, *note_buf;
        struct fadump_crash_info_header *fdh = NULL;
+       struct hdat_fadump_thread_hdr *thdr;
+       unsigned long addr;
+       u32 thread_pir;
+       char *bufp;
+       struct pt_regs regs;
+       unsigned int size_of_each_thread;
+       unsigned int regs_offset, regs_cnt, reg_esize;
+       int i;
+
+       if ((fadump_conf->cpu_state_destination_addr == 0) ||
+           (fadump_conf->cpu_state_entry_size == 0)) {
+               pr_err("CPU state data not available for processing!\n");
+               return -ENODEV;
+       }
+
+       size_of_each_thread = fadump_conf->cpu_state_entry_size;
+       num_cpus = (fadump_conf->cpu_state_data_size / size_of_each_thread);
+
+       addr = fadump_conf->cpu_state_destination_addr;
+       bufp = __va(addr);
+
+       /*
+        * Offset for register entries, entry size and registers count is
+        * duplicated in every thread header in keeping with HDAT format.
+        * Use these values from the first thread header.
+        */
+       thdr = (struct hdat_fadump_thread_hdr *)bufp;
+       regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+                      be32_to_cpu(thdr->offset));
+       reg_esize = be32_to_cpu(thdr->esize);
+       regs_cnt  = be32_to_cpu(thdr->ecnt);
 
-       num_cpus = 1;
        /* Allocate buffer to hold cpu crash notes. */
        fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
        fadump_conf->cpu_notes_buf_size =
@@ -260,10 +361,53 @@ static int __init opal_fadump_build_cpu_notes(struct 
fw_dump *fadump_conf)
        if (fadump_conf->fadumphdr_addr)
                fdh = __va(fadump_conf->fadumphdr_addr);
 
-       if (fdh && (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)) {
-               note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
-               final_note(note_buf);
+       pr_debug("--------CPU State Data------------\n");
+       pr_debug("NumCpus     : %u\n", num_cpus);
+       pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+                regs_offset, reg_esize, regs_cnt);
+
+       for (i = 0; i < num_cpus; i++, bufp += size_of_each_thread) {
+               thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+               thread_pir = be32_to_cpu(thdr->pir);
+               pr_debug("%04d) PIR: 0x%x, core state: 0x%02x\n",
+                        (i + 1), thread_pir, thdr->core_state);
+
+               /*
+                * Register state data of MAX cores is provided by firmware,
+                * but some of this cores may not be active. So, while
+                * processing register state data, check core state and
+                * skip threads that belong to inactive cores.
+                */
+               if (is_thread_core_inactive(thdr->core_state))
+                       continue;
+
+               /*
+                * If this is kernel initiated crash, crashing_cpu would be set
+                * appropriately and register data of the crashing CPU saved by
+                * crashing kernel. Add this saved register data of crashing CPU
+                * to elf notes and populate the pt_regs for the remaining CPUs
+                * from register state data provided by firmware.
+                */
+               if (fdh && (fdh->crashing_cpu == thread_pir)) {
+                       note_buf = fadump_regs_to_elf_notes(note_buf,
+                                                           &fdh->regs);
+                       pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 
0x%lx\n",
+                                fdh->crashing_cpu, fdh->regs.gpr[1],
+                                fdh->regs.nip);
+                       continue;
+               }
+
+               opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+                                     reg_esize, &regs);
 
+               note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+               pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+                        thread_pir, regs.gpr[1], regs.nip);
+       }
+       final_note(note_buf);
+
+       if (fdh) {
                pr_debug("Updating elfcore header (%llx) with cpu notes\n",
                         fdh->elfcorehdr_addr);
                fadump_update_elfcore_header(fadump_conf,
@@ -278,7 +422,8 @@ static int __init opal_fadump_process_fadump(struct fw_dump 
*fadump_conf)
        struct fadump_crash_info_header *fdh;
        int rc = 0;
 
-       if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
+       if (!opal_fdm_active || !opal_cpu_metadata ||
+           !fadump_conf->fadumphdr_addr)
                return -EINVAL;
 
        /* Validate the fadump crash info header */
@@ -288,13 +433,6 @@ static int __init opal_fadump_process_fadump(struct 
fw_dump *fadump_conf)
                return -EINVAL;
        }
 
-       /*
-        * TODO: To build cpu notes, find a way to map PIR to logical id.
-        *       Also, we may need different method for pseries and powernv.
-        *       The currently booted kernel could have a different PIR to
-        *       logical id mapping. So, try saving info of previous kernel's
-        *       paca to get the right PIR to logical id mapping.
-        */
        rc = opal_fadump_build_cpu_notes(fadump_conf);
        if (rc)
                return rc;
@@ -348,6 +486,14 @@ static void opal_fadump_trigger(struct 
fadump_crash_info_header *fdh,
 {
        int rc;
 
+       /*
+        * Unlike on pSeries platform, logical CPU number is not provided
+        * with architected register state data. So, store the crashing
+        * CPU's PIR instead to plug the appropriate register data for
+        * crashing CPU in the vmcore file.
+        */
+       fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
        rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
        if (rc == OPAL_UNSUPPORTED) {
                pr_emerg("Reboot type %d not supported.\n",
@@ -406,6 +552,7 @@ int __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, 
ulong node)
                u64 addr = 0;
                s64 ret;
                const struct opal_fadump_mem_struct *r_opal_fdm_active;
+               const struct opal_mpipl_fadump *r_opal_cpu_metadata;
 
                ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
                if ((ret != OPAL_SUCCESS) || !addr) {
@@ -430,6 +577,32 @@ int __init opal_fadump_dt_scan(struct fw_dump 
*fadump_conf, ulong node)
                        return 1;
                }
 
+               ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+               if ((ret != OPAL_SUCCESS) || !addr) {
+                       pr_err("Failed to get CPU metadata (%lld)\n", ret);
+                       return 1;
+               }
+
+               addr = be64_to_cpu(addr);
+               pr_debug("CPU metadata addr: %llx\n", addr);
+
+               opal_cpu_metadata = __va(addr);
+               r_opal_cpu_metadata = (void *)addr;
+               fadump_conf->cpu_state_data_version =
+                       be32_to_cpu(r_opal_cpu_metadata->cpu_data_version);
+               if (fadump_conf->cpu_state_data_version !=
+                   HDAT_FADUMP_CPU_DATA_VERSION) {
+                       pr_err("CPU data format version (%lu) mismatch!\n",
+                              fadump_conf->cpu_state_data_version);
+                       return 1;
+               }
+               fadump_conf->cpu_state_entry_size =
+                       be32_to_cpu(r_opal_cpu_metadata->cpu_data_size);
+               fadump_conf->cpu_state_destination_addr =
+                       be64_to_cpu(r_opal_cpu_metadata->region[0].dest);
+               fadump_conf->cpu_state_data_size =
+                       be64_to_cpu(r_opal_cpu_metadata->region[0].size);
+
                pr_info("Firmware-assisted dump is active.\n");
                fadump_conf->dump_active = 1;
                opal_fadump_get_config(fadump_conf, r_opal_fdm_active);
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h 
b/arch/powerpc/platforms/powernv/opal-fadump.h
index 423c9b2..7c44aba 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.h
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -31,4 +31,43 @@ struct opal_fadump_mem_struct {
        struct opal_mpipl_region        rgn[FADUMP_MAX_MEM_REGS];
 } __attribute__((packed));
 
+/*
+ * CPU state data is provided by f/w. Below are the definitions
+ * provided in HDAT spec. Refer to latest HDAT specification for
+ * any update to this format.
+ */
+
+#define HDAT_FADUMP_CPU_DATA_VERSION           1
+
+#define HDAT_FADUMP_CORE_INACTIVE              (0x0F)
+
+/* HDAT thread header for register entries */
+struct hdat_fadump_thread_hdr {
+       __be32  pir;
+       /* 0x00 - 0x0F - The corresponding stop state of the core */
+       u8      core_state;
+       u8      reserved[3];
+
+       __be32  offset; /* Offset to Register Entries array */
+       __be32  ecnt;   /* Number of entries */
+       __be32  esize;  /* Alloc size of each array entry in bytes */
+       __be32  eactsz; /* Actual size of each array entry in bytes */
+} __attribute__((packed));
+
+/* Register types populated by f/w */
+#define HDAT_FADUMP_REG_TYPE_GPR               0x01
+#define HDAT_FADUMP_REG_TYPE_SPR               0x02
+
+/* ID numbers used by f/w while populating certain registers */
+#define HDAT_FADUMP_REG_ID_NIP                 0x7D0
+#define HDAT_FADUMP_REG_ID_MSR                 0x7D1
+#define HDAT_FADUMP_REG_ID_CCR                 0x7D2
+
+/* HDAT register entry. */
+struct hdat_fadump_reg_entry {
+       __be32          reg_type;
+       __be32          reg_num;
+       __be64          reg_val;
+} __attribute__((packed));
+
 #endif /* __PPC64_OPAL_FA_DUMP_H__ */

Reply via email to