On my T5220 LDOM guests cannot boot from softraid because ofwboot
crashes with a "Fast Data Access MMU Miss" while loading the kernel:
>> OpenBSD BOOT 1.9
ERROR: /iscsi-hba: No iscsi-network-bootpath property
sr0*
Passphrase:
Booting sr0:a/bsd
8480888@0x1000000
ERROR: Last Trap: Fast Data Access MMU Miss
I've tracked down the failure to a crash in OF_open() called in sr_strategy().
There is no missing OF_close() call. So it seems a memory/resource leak
of some kind is happening in firmware during OF_open()/OF_close().
Affected firmware version info:
SP firmware 3.0.12.8.a
SP firmware build number: 108523
SP firmware date: Fri Mar 11 07:19:16 PST 2016
SP filesystem version: 0.1.22
hypervisor_version = Hypervisor 1.10.7.h 2016/03/11 07:13
obp_version = OpenBoot 4.33.6.g 2016/03/11 06:05
post_version = POST 4.33.6.g 2016/03/11 06:15
status = OpenBSD running
sysfw_version = Sun System Firmware 7.4.10.a 2016/03/11 07:45
The diff below allows a guest to boot from softraid on this machine.
ok?
Index: disk.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
retrieving revision 1.1
diff -u -p -r1.1 disk.h
--- disk.h 26 Nov 2014 20:30:41 -0000 1.1
+++ disk.h 4 Mar 2018 15:56:59 -0000
@@ -37,6 +37,9 @@ struct diskinfo {
char path[256];
struct disklabel disklabel;
struct sr_boot_volume *sr_vol;
+ int ihandle;
+ int flags;
+#define DISKINFO_FLAG_OPEN 0x1
TAILQ_ENTRY(diskinfo) list;
};
Index: ofdev.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
retrieving revision 1.25
diff -u -p -r1.25 ofdev.c
--- ofdev.c 1 Oct 2015 16:08:20 -0000 1.25
+++ ofdev.c 4 Mar 2018 15:56:59 -0000
@@ -165,6 +165,7 @@ devclose(struct open_file *of)
#endif
#ifdef SOFTRAID
if (op->type == OFDEV_SOFTRAID) {
+ sr_close_volume(bootdev_dip->sr_vol);
op->handle = -1;
return 0;
}
Index: softraid_sparc64.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/softraid_sparc64.c,v
retrieving revision 1.2
diff -u -p -r1.2 softraid_sparc64.c
--- softraid_sparc64.c 11 Sep 2016 17:53:26 -0000 1.2
+++ softraid_sparc64.c 4 Mar 2018 15:56:59 -0000
@@ -306,6 +306,22 @@ srprobe(void)
free(md, SR_META_SIZE * DEV_BSIZE);
}
+/*
+ * Cache ihandle to work around memory leaks in firmware
+ * with repeated OF_open/OF_close calls.
+ */
+int
+sr_open_disk(struct diskinfo *dip)
+{
+ if ((dip->flags & DISKINFO_FLAG_OPEN) == 0) {
+ dip->ihandle = OF_open(dip->path);
+ if (dip->ihandle != -1)
+ dip->flags |= DISKINFO_FLAG_OPEN;
+ }
+
+ return dip->ihandle;
+}
+
int
sr_strategy(struct sr_boot_volume *bv, int rw, daddr32_t blk, size_t size,
void *buf, size_t *rsize)
@@ -347,7 +363,7 @@ sr_strategy(struct sr_boot_volume *bv, i
blk += bv->sbv_data_blkno;
/* XXX - If I/O failed we should try another chunk... */
- ihandle = OF_open(dip->path);
+ ihandle = sr_open_disk(dip);
if (ihandle == -1)
return EIO;
bzero(&ofdev, sizeof(ofdev));
@@ -356,7 +372,6 @@ sr_strategy(struct sr_boot_volume *bv, i
ofdev.bsize = DEV_BSIZE;
ofdev.partoff = DL_GETPOFFSET(pp);
err = strategy(&ofdev, rw, blk, size, buf, rsize);
- OF_close(ihandle);
return err;
} else if (bv->sbv_level == 'C') {
@@ -371,7 +386,7 @@ sr_strategy(struct sr_boot_volume *bv, i
dip = (struct diskinfo *)bc->sbc_diskinfo;
pp = &dip->disklabel.d_partitions[bc->sbc_part - 'a'];
- ihandle = OF_open(dip->path);
+ ihandle = sr_open_disk(dip);
if (ihandle == -1)
return EIO;
bzero(&ofdev, sizeof(ofdev));
@@ -395,7 +410,6 @@ sr_strategy(struct sr_boot_volume *bv, i
printf("Read from crypto volume failed "
"(read %d bytes): %s\n", *rsize,
strerror(err));
- OF_close(ihandle);
return err;
}
bcopy(&blkno, iv, sizeof(blkno));
@@ -404,11 +418,30 @@ sr_strategy(struct sr_boot_volume *bv, i
aes_xts_decrypt(&ctx, bp + j);
}
*rsize = nsect * DEV_BSIZE;
- OF_close(ihandle);
return err;
} else
return ENOTSUP;
+}
+
+void
+sr_close_volume(struct sr_boot_volume *bv)
+{
+ struct sr_boot_chunk *bc;
+ struct diskinfo *dip;
+
+ /* Select first online chunk. */
+ SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link)
+ if (bc->sbc_state == BIOC_SDONLINE)
+ break;
+ if (bc == NULL)
+ return;
+
+ dip = (struct diskinfo *)bc->sbc_diskinfo;
+ if (dip && (dip->flags & DISKINFO_FLAG_OPEN)) {
+ OF_close(dip->ihandle);
+ dip->flags &= ~DISKINFO_FLAG_OPEN;
+ }
}
const char *
Index: softraid_sparc64.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/softraid_sparc64.h,v
retrieving revision 1.3
diff -u -p -r1.3 softraid_sparc64.h
--- softraid_sparc64.h 11 Sep 2016 17:53:26 -0000 1.3
+++ softraid_sparc64.h 4 Mar 2018 15:56:59 -0000
@@ -24,5 +24,6 @@ void srprobe(void);
const char *sr_getdisklabel(struct sr_boot_volume *, struct disklabel *);
int sr_strategy(struct sr_boot_volume *, int, daddr32_t, size_t,
void *, size_t *);
+void sr_close_volume(struct sr_boot_volume *);
#endif /* _SOFTRAID_SPARC64_H */