On 29/05/2024 17.43, jro...@linux.ibm.com wrote:
From: Jared Rossi <jro...@linux.ibm.com>

Add a routine for loading the next IPLB if a device fails to boot.

This includes some minor changes to the List-Directed IPL routine so that the
failing device may be retried using the legacy boot pointers before moving on to
the next device.

Signed-off-by: Jared Rossi <jro...@linux.ibm.com>
---
...
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index a2137449dc..69391557fa 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -144,7 +144,10 @@ static block_number_t load_eckd_segments(block_number_t 
blk, bool ldipl,
      bool more_data;
memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
-    read_block(blk, bprs, "BPRS read failed");
+    if (!read_block_nonfatal(blk, bprs)) {
+        IPL_assert(ldipl, "BPRS read failed");
+        return -1;
+    }
do {
          more_data = false;
@@ -188,7 +191,10 @@ static block_number_t load_eckd_segments(block_number_t 
blk, bool ldipl,
                   * I.e. the next ptr must point to the unused memory area
                   */
                  memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
-                read_block(block_nr, bprs, "BPRS continuation read failed");
+                if (!read_block_nonfatal(block_nr, bprs)) {
+                    IPL_assert(ldipl, "BPRS continuation read failed");
+                    break;
+                }
                  more_data = true;
                  break;
              }
@@ -197,7 +203,10 @@ static block_number_t load_eckd_segments(block_number_t 
blk, bool ldipl,
               * to memory (address).
               */
              rc = virtio_read_many(block_nr, (void *)(*address), count + 1);
-            IPL_assert(rc == 0, "code chunk read failed");
+            if (rc != 0) {
+                IPL_assert(ldipl, "code chunk read failed");
+                break;
+            }
*address += (count + 1) * virtio_get_block_size();
          }
@@ -295,13 +304,22 @@ static void run_eckd_boot_script(block_number_t 
bmt_block_nr,
                 " maximum number of boot entries allowed");
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
-    read_block(bmt_block_nr, sec, "Cannot read Boot Map Table");
+    if (!read_block_nonfatal(bmt_block_nr, sec)) {
+        IPL_assert(ldipl, "Cannot read Boot Map Table");
+        return;
+    }
block_nr = gen_eckd_block_num(&bmt->entry[loadparm].xeckd, ldipl);
-    IPL_assert(block_nr != -1, "Cannot find Boot Map Table Entry");
+    if (block_nr == -1) {
+        IPL_assert(ldipl, "Cannot find Boot Map Table Entry");
+        return;
+    }
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
-    read_block(block_nr, sec, "Cannot read Boot Map Script");
+    if (!read_block_nonfatal(block_nr, sec)) {
+        IPL_assert(ldipl, "Cannot read Boot Map Script");
+        return;
+    }
for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD ||
                  bms->entry[i].type == BOOT_SCRIPT_SIGNATURE; i++) {
@@ -319,13 +337,10 @@ static void run_eckd_boot_script(block_number_t 
bmt_block_nr,
          } while (block_nr != -1);
      }
- if (ldipl && bms->entry[i].type != BOOT_SCRIPT_EXEC) {
-        /* Abort LD-IPL and retry as CCW-IPL */
+    if (bms->entry[i].type != BOOT_SCRIPT_EXEC) {
+        IPL_assert(ldipl, "Unknown script entry type");
          return;
      }
-
-    IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC,
-               "Unknown script entry type");
      write_reset_psw(bms->entry[i].address.load_address); /* no return */
      jump_to_IPL_code(0); /* no return */
  }
@@ -492,7 +507,7 @@ static void ipl_eckd(void)
              /* LD-IPL does not use the S1B bock, just make it NULL */
              run_eckd_boot_script(ldipl_bmt, NULL_BLOCK_NR);
              /* Only return in error, retry as CCW-IPL */
-            sclp_print("Retrying IPL ");
+            sclp_print("LD-IPL failed, retrying device\n");
              print_eckd_msg();
          }
          memset(sec, FREE_SPACE_FILLER, sizeof(sec));
@@ -944,5 +959,5 @@ void zipl_load(void)
          panic("\n! Unknown IPL device type !\n");
      }
- sclp_print("zIPL load failed.\n");
+    panic("zIPL load failed.\n");

Why replacing the sclp_print() here? Wouldn't it be nicer to continue panicking on the calling site instead?

  }
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 3e51d698d7..248ed5a410 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -53,6 +53,12 @@ unsigned int get_loadparm_index(void)
      return atoui(loadparm_str);
  }
+static void copy_qipl(void)
+{
+    QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS;
+    memcpy(&qipl, early_qipl, sizeof(QemuIplParameters));
+}

You could move this function as a static inline into iplb.h ...

...
diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c
index 5cd619b2d6..65cee15fef 100644
--- a/pc-bios/s390-ccw/netmain.c
+++ b/pc-bios/s390-ccw/netmain.c
@@ -36,6 +36,7 @@
  #include "cio.h"
  #include "virtio.h"
  #include "s390-time.h"
+#include "iplb.h"
#define DEFAULT_BOOT_RETRIES 10
  #define DEFAULT_TFTP_RETRIES 20
@@ -51,6 +52,7 @@ void write_iplb_location(void) {}
  #define STSI322_VMDB_UUID_OFFSET ((8 + 12) * 4)
IplParameterBlock iplb __attribute__((aligned(PAGE_SIZE)));
+QemuIplParameters qipl;
  static char cfgbuf[2048];
static SubChannelId net_schid = { .one = 1 };
@@ -513,6 +515,8 @@ void main(void)
  {
      filename_ip_t fn_ip;
      int rc, fnlen;
+    QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS;
+    memcpy(&qipl, early_qipl, sizeof(QemuIplParameters));

... then you could use copy_qipl() here, too.

 Thomas



Reply via email to