On 29/04/26 12:21 PM, Shivang Upadhyay wrote:
When a ppc machine is started with maxcpus greater than the number of
present CPUs (e.g., -smp cpus=2,maxcpus=8), firmware allocates the CPU
state data buffer based on maxcpus and advertises this size to the
kernel during fadump registration.

However, fadump currently fails to generate a vmcore during the
rebooted kernel because:

1. The CPU state buffer length does not match what firmware advertised
    (QEMU only populates entries for present CPUs, not maxcpus)
2. The kernel cannot find CPU data for all maxcpus slots

This patch adds placeholder entries for the unrealized cpus.
The placeholder entries contain only the CPUSTRT and CPUEND markers
with the CPU ID, which is sufficient for the kernel to recognize the
CPU slot exists even though the CPU was never realized.

This ensures the CPU state buffer size and structure matches what
firmware advertised to the kernel, allowing fadump to successfully
generate vmcore.

Cc: Aditya Gupta <[email protected]>
Cc: Sourabh Jain <[email protected]>
Cc: Mahesh J Salgaonkar <[email protected]>
Cc: Harsh Prateek Bora <[email protected]>
Reported-by: Anushree Mathur <[email protected]>
Signed-off-by: Shivang Upadhyay <[email protected]>
---
  hw/ppc/spapr_fadump.c | 57 +++++++++++++++++++++++++++++++++----------
  1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/hw/ppc/spapr_fadump.c b/hw/ppc/spapr_fadump.c
index 13cab0cfe1..b1a21ff115 100644
--- a/hw/ppc/spapr_fadump.c
+++ b/hw/ppc/spapr_fadump.c
@@ -309,6 +309,30 @@ static bool do_preserve_region(FadumpSection *region)
      return true;
  }

+/*
+ * Populate placeholder register entries for an unrealized CPU
+ *
+ * For CPUs that are not realized (cpu_index < maxcpus), we need to
+ * create minimal register entries containing only CPUSTRT and CPUEND
+ * markers to maintain the expected buffer structure.
+ *
+ * Returns pointer just past the placeholder entries, which can be used
+ * as the start address for the next CPU's register entries
+ */
+static FadumpRegEntry *populate_cpu_reg_entries_placeholder(int cpu_id,
+                                                             FadumpRegEntry 
*curr_reg_entry)
+{
+    curr_reg_entry->reg_id = cpu_to_be64(fadump_str_to_u64("CPUSTRT"));
+    curr_reg_entry->reg_value = cpu_to_be64(cpu_id & FADUMP_CPU_ID_MASK);
+    ++curr_reg_entry;
+
+    curr_reg_entry->reg_id = cpu_to_be64(fadump_str_to_u64("CPUEND"));
+    curr_reg_entry->reg_value = cpu_to_be64(cpu_id & FADUMP_CPU_ID_MASK);
+    ++curr_reg_entry;
+
+    return curr_reg_entry;
+}
+
  /*
   * Populate the passed CPUs register entries, in the buffer starting at
   * the argument 'curr_reg_entry'
@@ -450,7 +474,7 @@ static FadumpRegEntry *populate_cpu_reg_entries(CPUState 
*cpu,
   * callers), and sets the size of this buffer in the argument
   * 'cpu_state_len'
   */
-static void *get_cpu_state_data(uint64_t *cpu_state_len)
+static void *get_cpu_state_data(uint64_t *cpu_state_len, MachineState *ms)
  {
      FadumpRegSaveAreaHeader reg_save_hdr;
      g_autofree FadumpRegEntry *reg_entries = NULL;
@@ -459,15 +483,11 @@ static void *get_cpu_state_data(uint64_t *cpu_state_len)

      uint32_t num_reg_entries;
      uint32_t reg_entries_size;
-    uint32_t num_cpus = 0;
+    uint32_t num_cpus = ms->smp.max_cpus;

      void *cpu_state_buffer = NULL;
      uint64_t offset = 0;

-    CPU_FOREACH(cpu) {
-        ++num_cpus;
-    }
-
      reg_save_hdr.version = cpu_to_be32(0);
      reg_save_hdr.magic_number =
          cpu_to_be64(fadump_str_to_u64("REGSAVE"));
@@ -484,10 +504,20 @@ static void *get_cpu_state_data(uint64_t *cpu_state_len)
      /* Pointer to current CPU's registers */
      curr_reg_entry = reg_entries;

-    /* Populate register entries for all CPUs */
-    CPU_FOREACH(cpu) {
-        cpu_synchronize_state(cpu);
-        curr_reg_entry = populate_cpu_reg_entries(cpu, curr_reg_entry);
+    /*
+     * Populate register entries for all CPU slots (0 to maxcpus-1)
+     */
+    for (int i = 0; i < num_cpus; i++) {
+        cpu = qemu_get_cpu(i);
+        if (cpu) {
+            /* CPU is realized - populate full register entries */
+            cpu_synchronize_state(cpu);
+            curr_reg_entry = populate_cpu_reg_entries(cpu, curr_reg_entry);
+        } else {
+            /* CPU slot is empty - populate placeholder entries */
+            curr_reg_entry = populate_cpu_reg_entries_placeholder(i,
+                                                                   
curr_reg_entry);
+        }
      }

      *cpu_state_len = 0;
@@ -526,7 +556,7 @@ static void *get_cpu_state_data(uint64_t *cpu_state_len)
   * such as invalid destination address or non-fatal error errors likely
   * caused due to invalid parameters, return true and set region->error_flags
   */
-static bool do_populate_cpu_state(FadumpSection *region)
+static bool do_populate_cpu_state(FadumpSection *region, MachineState *ms)
  {
      uint64_t dest_addr = be64_to_cpu(region->destination_address);
      uint64_t cpu_state_len = 0;
@@ -541,7 +571,7 @@ static bool do_populate_cpu_state(FadumpSection *region)
      attrs.user = 0;
      attrs.memory = 1;

-    cpu_state_buffer  = get_cpu_state_data(&cpu_state_len);
+    cpu_state_buffer  = get_cpu_state_data(&cpu_state_len, ms);

      io_result = address_space_write(default_as, dest_addr, attrs,
              cpu_state_buffer, cpu_state_len);
@@ -597,6 +627,7 @@ static bool do_populate_cpu_state(FadumpSection *region)
  static bool fadump_preserve_mem(SpaprMachineState *spapr)
  {
      FadumpMemStruct *fdm = &spapr->registered_fdm;
+    MachineState *ms = MACHINE(spapr);
      uint16_t dump_num_sections, data_type;

      assert(spapr->fadump_registered);
@@ -627,7 +658,7 @@ static bool fadump_preserve_mem(SpaprMachineState *spapr)

          switch (data_type) {
          case FADUMP_CPU_STATE_DATA:
-            if (!do_populate_cpu_state(&fdm->rgn[i])) {
+            if (!do_populate_cpu_state(&fdm->rgn[i], ms)) {
                  qemu_log_mask(LOG_GUEST_ERROR,
                      "FADump: Failed to store CPU State Data");
                  fdm->header.dump_status_flag |=


Hi Shivang,
Thanks for working on it, but patch is fixing the issue only for thread as 1 for guest topology but breaking for 2 or 4 or 8 as threads in guest.

Putting the details about the issue here.

As I reported when maxcpus tag is present, fadump is resulting
into multiple Kernel OOPS along with rtas error message "rtas fadump: Dump taken by platform is incomplete (0)".

Kernel OOPS are like following. Some kernel OOPS are for vmalloc memory, some are for cpu alloc etc in continuous loops and in the end guest crashed.

[    7.976103][  T775] BUG: Unable to handle kernel data access at 0x629cb8ee54efc16f
[    7.976260][  T775] Faulting instruction address: 0xc000000000643e7c
[    7.976357][  T775] Oops: Kernel access of bad area, sig: 11 [#1]

[    7.978143][  T775] GPR28: 0000000000010000 629cb8ee54efc16f 0000000000000008 c000000002e56a00 [    7.979257][  T775] NIP [c000000000643e7c] __kmalloc_node_noprof+0x21c/0x650 [    7.979402][  T775] LR [c000000000643e64] __kmalloc_node_noprof+0x204/0x650
[    7.979504][  T775] Call Trace:
[    7.979565][  T775] [c0000003fee5b620] [c008000000000000] 0xc008000000000000 (unreliable) [    7.979688][  T775] [c0000003fee5b6b0] [c000000000614ae8] __vmalloc_node_range_noprof+0x208/0x990 [    7.979831][  T775] [c0000003fee5b830] [c000000000615564] __vmalloc_noprof+0x54/0x70 [    7.979956][  T775] [c0000003fee5b8a0] [c000000000494cec] bpf_prog_alloc_no_stats+0x5c/0x2c0 [    7.980098][  T775] [c0000003fee5b8f0] [c000000000495028] bpf_prog_alloc+0xd8/0x120 [    7.980220][  T775] [c0000003fee5b930] [c000000000eff194] bpf_prog_create_from_user+0x74/0x190 [    7.980378][  T775] [c0000003fee5b9a0] [c0000000003dd770] seccomp_set_mode_filter+0x190/0xd20 [    7.980524][  T775] [c0000003fee5ba40] [c00000000002e410] system_call_exception+0x130/0x2a0 [    7.980649][  T775] [c0000003fee5be50] [c00000000000cfdc] system_call_vectored_common+0x15c/0x2ec
[    7.980791][  T775] ---- interrupt: 3000 at 0x7fffb6d6b3e4



[    7.805704][    C1] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
[    7.805866][    C1] Faulting instruction address: 0x00000000
[    7.805976][    C1] Oops: Kernel access of bad area, sig: 7 [#1]
[    7.811066][    C1] [c000000003f9bd40] [c0000000012262c0] cpuidle_enter_state+0x440/0x6f8 [    7.811069][    C1] [c000000003f9bde0] [c000000000e1ab80] cpuidle_enter+0x50/0x80 [    7.811074][    C1] [c000000003f9be20] [c0000000002a3d28] call_cpuidle+0x48/0x90 [    7.811077][    C1] [c000000003f9be40] [c0000000002ab8d0] do_idle+0x2f0/0x3a0 [    7.811080][    C1] [c000000003f9beb0] [c0000000002abc4c] cpu_startup_entry+0x4c/0x50 [    7.811082][    C1] [c000000003f9bee0] [c000000000058dbc] start_secondary+0x6bc/0xe30 [    7.811086][    C1] [c000000003f9bfe0] [c00000000000e158] start_secondary_prolog+0x10/0x14 [    7.811089][    C1] Code: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
[    7.813545][    C1] ---[ end trace 0000000000000000 ]---
[    7.818134][    C1] pstore: backend (nvram) writing error (-1)




After applying the patch the issue is getting fixed only for the scenario where topology is having thread "1" for the guest, but when I give threads as 2 or 4 or 8 for the guest topology it is still crashing same as above mentioned. So the patch is
fixing the issue for partial scenarios. Some scenarios are still breaking.

Here I changed threads to 2 or 4 or 8 (-smp 8,maxcpus=64,sockets=2,dies=1,clusters=1,cores=32,threads=1)

Thanks,
Anushree Mathur

Reply via email to