Re: [PATCH 4/5] s390x: Add boot device fallback infrastructure
On 05/06/2024 10.20, Thomas Huth wrote: On 29/05/2024 17.43, jro...@linux.ibm.com wrote: From: Jared Rossi Add a routine for loading the next IPLB if a device fails to boot. This includes some minor changes to the List-Directed IPL routine so that the failing device may be retried using the legacy boot pointers before moving on to the next device. Signed-off-by: Jared Rossi --- ... diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index a2137449dc..69391557fa 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -144,7 +144,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, bool more_data; memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs)); - read_block(blk, bprs, "BPRS read failed"); + if (!read_block_nonfatal(blk, bprs)) { + IPL_assert(ldipl, "BPRS read failed"); + return -1; + } do { more_data = false; @@ -188,7 +191,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, * I.e. the next ptr must point to the unused memory area */ memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs)); - read_block(block_nr, bprs, "BPRS continuation read failed"); + if (!read_block_nonfatal(block_nr, bprs)) { + IPL_assert(ldipl, "BPRS continuation read failed"); + break; + } more_data = true; break; } @@ -197,7 +203,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, * to memory (address). */ rc = virtio_read_many(block_nr, (void *)(*address), count + 1); - IPL_assert(rc == 0, "code chunk read failed"); + if (rc != 0) { + IPL_assert(ldipl, "code chunk read failed"); + break; + } *address += (count + 1) * virtio_get_block_size(); } @@ -295,13 +304,22 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr, " maximum number of boot entries allowed"); memset(sec, FREE_SPACE_FILLER, sizeof(sec)); - read_block(bmt_block_nr, sec, "Cannot read Boot Map Table"); + if (!read_block_nonfatal(bmt_block_nr, sec)) { + IPL_assert(ldipl, "Cannot read Boot Map Table"); + return; + } block_nr = gen_eckd_block_num(>entry[loadparm].xeckd, ldipl); - IPL_assert(block_nr != -1, "Cannot find Boot Map Table Entry"); + if (block_nr == -1) { + IPL_assert(ldipl, "Cannot find Boot Map Table Entry"); + return; + } memset(sec, FREE_SPACE_FILLER, sizeof(sec)); - read_block(block_nr, sec, "Cannot read Boot Map Script"); + if (!read_block_nonfatal(block_nr, sec)) { + IPL_assert(ldipl, "Cannot read Boot Map Script"); + return; + } for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD || bms->entry[i].type == BOOT_SCRIPT_SIGNATURE; i++) { @@ -319,13 +337,10 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr, } while (block_nr != -1); } - if (ldipl && bms->entry[i].type != BOOT_SCRIPT_EXEC) { - /* Abort LD-IPL and retry as CCW-IPL */ + if (bms->entry[i].type != BOOT_SCRIPT_EXEC) { + IPL_assert(ldipl, "Unknown script entry type"); return; } - - IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC, - "Unknown script entry type"); write_reset_psw(bms->entry[i].address.load_address); /* no return */ jump_to_IPL_code(0); /* no return */ } @@ -492,7 +507,7 @@ static void ipl_eckd(void) /* LD-IPL does not use the S1B bock, just make it NULL */ run_eckd_boot_script(ldipl_bmt, NULL_BLOCK_NR); /* Only return in error, retry as CCW-IPL */ - sclp_print("Retrying IPL "); + sclp_print("LD-IPL failed, retrying device\n"); print_eckd_msg(); } memset(sec, FREE_SPACE_FILLER, sizeof(sec)); @@ -944,5 +959,5 @@ void zipl_load(void) panic("\n! Unknown IPL device type !\n"); } - sclp_print("zIPL load failed.\n"); + panic("zIPL load failed.\n"); Why replacing the sclp_print() here? Wouldn't it be nicer to continue panicking on the calling site instead? Ok, after looking at the 5th patch, I think I understand it now: panic() is not fatal anymore and might restart with the next boot device... not sure whether I like that, but let's discuss that on patch 5 instead... Thomas
Re: [PATCH 4/5] s390x: Add boot device fallback infrastructure
On 29/05/2024 17.43, jro...@linux.ibm.com wrote: From: Jared Rossi Add a routine for loading the next IPLB if a device fails to boot. This includes some minor changes to the List-Directed IPL routine so that the failing device may be retried using the legacy boot pointers before moving on to the next device. Signed-off-by: Jared Rossi --- ... diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index a2137449dc..69391557fa 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -144,7 +144,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, bool more_data; memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs)); -read_block(blk, bprs, "BPRS read failed"); +if (!read_block_nonfatal(blk, bprs)) { +IPL_assert(ldipl, "BPRS read failed"); +return -1; +} do { more_data = false; @@ -188,7 +191,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, * I.e. the next ptr must point to the unused memory area */ memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs)); -read_block(block_nr, bprs, "BPRS continuation read failed"); +if (!read_block_nonfatal(block_nr, bprs)) { +IPL_assert(ldipl, "BPRS continuation read failed"); +break; +} more_data = true; break; } @@ -197,7 +203,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, * to memory (address). */ rc = virtio_read_many(block_nr, (void *)(*address), count + 1); -IPL_assert(rc == 0, "code chunk read failed"); +if (rc != 0) { +IPL_assert(ldipl, "code chunk read failed"); +break; +} *address += (count + 1) * virtio_get_block_size(); } @@ -295,13 +304,22 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr, " maximum number of boot entries allowed"); memset(sec, FREE_SPACE_FILLER, sizeof(sec)); -read_block(bmt_block_nr, sec, "Cannot read Boot Map Table"); +if (!read_block_nonfatal(bmt_block_nr, sec)) { +IPL_assert(ldipl, "Cannot read Boot Map Table"); +return; +} block_nr = gen_eckd_block_num(>entry[loadparm].xeckd, ldipl); -IPL_assert(block_nr != -1, "Cannot find Boot Map Table Entry"); +if (block_nr == -1) { +IPL_assert(ldipl, "Cannot find Boot Map Table Entry"); +return; +} memset(sec, FREE_SPACE_FILLER, sizeof(sec)); -read_block(block_nr, sec, "Cannot read Boot Map Script"); +if (!read_block_nonfatal(block_nr, sec)) { +IPL_assert(ldipl, "Cannot read Boot Map Script"); +return; +} for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD || bms->entry[i].type == BOOT_SCRIPT_SIGNATURE; i++) { @@ -319,13 +337,10 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr, } while (block_nr != -1); } -if (ldipl && bms->entry[i].type != BOOT_SCRIPT_EXEC) { -/* Abort LD-IPL and retry as CCW-IPL */ +if (bms->entry[i].type != BOOT_SCRIPT_EXEC) { +IPL_assert(ldipl, "Unknown script entry type"); return; } - -IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC, - "Unknown script entry type"); write_reset_psw(bms->entry[i].address.load_address); /* no return */ jump_to_IPL_code(0); /* no return */ } @@ -492,7 +507,7 @@ static void ipl_eckd(void) /* LD-IPL does not use the S1B bock, just make it NULL */ run_eckd_boot_script(ldipl_bmt, NULL_BLOCK_NR); /* Only return in error, retry as CCW-IPL */ -sclp_print("Retrying IPL "); +sclp_print("LD-IPL failed, retrying device\n"); print_eckd_msg(); } memset(sec, FREE_SPACE_FILLER, sizeof(sec)); @@ -944,5 +959,5 @@ void zipl_load(void) panic("\n! Unknown IPL device type !\n"); } -sclp_print("zIPL load failed.\n"); +panic("zIPL load failed.\n"); Why replacing the sclp_print() here? Wouldn't it be nicer to continue panicking on the calling site instead? } diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index 3e51d698d7..248ed5a410 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -53,6 +53,12 @@ unsigned int get_loadparm_index(void) return atoui(loadparm_str); } +static void copy_qipl(void) +{ +QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; +memcpy(, early_qipl, sizeof(QemuIplParameters)); +} You could move this function as a static inline into iplb.h ... ... diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c index 5cd619b2d6..65cee15fef 100644 ---
[PATCH 4/5] s390x: Add boot device fallback infrastructure
From: Jared Rossi Add a routine for loading the next IPLB if a device fails to boot. This includes some minor changes to the List-Directed IPL routine so that the failing device may be retried using the legacy boot pointers before moving on to the next device. Signed-off-by: Jared Rossi --- pc-bios/s390-ccw/bootmap.h | 5 + pc-bios/s390-ccw/iplb.h| 24 ++ pc-bios/s390-ccw/bootmap.c | 41 ++ pc-bios/s390-ccw/main.c| 15 +- pc-bios/s390-ccw/netmain.c | 4 5 files changed, 71 insertions(+), 18 deletions(-) diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h index d4690a88c2..d5061ed6c8 100644 --- a/pc-bios/s390-ccw/bootmap.h +++ b/pc-bios/s390-ccw/bootmap.h @@ -366,6 +366,11 @@ static inline void read_block(block_number_t blockno, IPL_assert(virtio_read(blockno, buffer) == 0, errmsg); } +static inline bool read_block_nonfatal(block_number_t blockno, void *buffer) +{ +return (virtio_read(blockno, buffer) == 0); +} + static inline bool block_size_ok(uint32_t block_size) { return block_size == virtio_get_block_size(); diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h index 16643f5879..3c29d23375 100644 --- a/pc-bios/s390-ccw/iplb.h +++ b/pc-bios/s390-ccw/iplb.h @@ -49,4 +49,28 @@ static inline bool set_iplb(IplParameterBlock *iplb) return manage_iplb(iplb, false); } +/* + * The IPL started on the device, but failed in some way. If the IPLB chain + * still has more devices left to try, use the next device in order. Set the + * next IPLB and save the current qipl parameters state. + */ +static inline bool load_next_iplb(void) +{ +IplParameterBlock *next_iplb; + +if (qipl.num_iplbs < 1) { +return false; +} + +next_iplb = (IplParameterBlock *) qipl.next_iplb; +memcpy(, next_iplb, sizeof(IplParameterBlock)); +set_iplb(); + +qipl.num_iplbs--; +qipl.next_iplb = qipl.next_iplb + sizeof(IplParameterBlock); +memcpy((QemuIplParameters *)QIPL_ADDRESS, , sizeof(QemuIplParameters)); + +return true; +} + #endif /* IPLB_H */ diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index a2137449dc..69391557fa 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -144,7 +144,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, bool more_data; memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs)); -read_block(blk, bprs, "BPRS read failed"); +if (!read_block_nonfatal(blk, bprs)) { +IPL_assert(ldipl, "BPRS read failed"); +return -1; +} do { more_data = false; @@ -188,7 +191,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, * I.e. the next ptr must point to the unused memory area */ memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs)); -read_block(block_nr, bprs, "BPRS continuation read failed"); +if (!read_block_nonfatal(block_nr, bprs)) { +IPL_assert(ldipl, "BPRS continuation read failed"); +break; +} more_data = true; break; } @@ -197,7 +203,10 @@ static block_number_t load_eckd_segments(block_number_t blk, bool ldipl, * to memory (address). */ rc = virtio_read_many(block_nr, (void *)(*address), count + 1); -IPL_assert(rc == 0, "code chunk read failed"); +if (rc != 0) { +IPL_assert(ldipl, "code chunk read failed"); +break; +} *address += (count + 1) * virtio_get_block_size(); } @@ -295,13 +304,22 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr, " maximum number of boot entries allowed"); memset(sec, FREE_SPACE_FILLER, sizeof(sec)); -read_block(bmt_block_nr, sec, "Cannot read Boot Map Table"); +if (!read_block_nonfatal(bmt_block_nr, sec)) { +IPL_assert(ldipl, "Cannot read Boot Map Table"); +return; +} block_nr = gen_eckd_block_num(>entry[loadparm].xeckd, ldipl); -IPL_assert(block_nr != -1, "Cannot find Boot Map Table Entry"); +if (block_nr == -1) { +IPL_assert(ldipl, "Cannot find Boot Map Table Entry"); +return; +} memset(sec, FREE_SPACE_FILLER, sizeof(sec)); -read_block(block_nr, sec, "Cannot read Boot Map Script"); +if (!read_block_nonfatal(block_nr, sec)) { +IPL_assert(ldipl, "Cannot read Boot Map Script"); +return; +} for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD || bms->entry[i].type == BOOT_SCRIPT_SIGNATURE; i++) { @@ -319,13 +337,10 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr, } while (block_nr != -1); } -if (ldipl && bms->entry[i].type != BOOT_SCRIPT_EXEC) { -/*