On 11/02/10 15:56, Marco Peereboom wrote: > Got tons of great test results but still not an independent RAID 1 > rebuild. I really want to commit this but I won't until someone else > besides me tests this. > > On Thu, Oct 21, 2010 at 02:17:58PM -0500, Marco Peereboom wrote:
Sun E250 (sparc64), built softraid RAID1 pair, installed kernel with this patch, removed one drive from the RAID set, replaced drive with a different drive, rebuilt RAID1 to that new drive. No issues seen. Nick. >> anyone? >> >> On Wed, Oct 20, 2010 at 08:47:00PM -0500, Marco Peereboom wrote: >> > On Thu, Sep 30, 2010 at 03:35:33AM +0200, Tobias Ulmer wrote: >> > > I got this after a while: >> > > >> > > panic: softraid0: sr_crypto_finish_io >> > > >> > > No serial, so there's no more info. You know where to find me >> > >> > new diff that should fix all them issues. >> > >> > please test, especially raid 1 including rebuild and stuff. >> > >> > Index: dev/softraid.c >> > =================================================================== >> > RCS file: /cvs/src/sys/dev/softraid.c,v >> > retrieving revision 1.215 >> > diff -u -p -r1.215 softraid.c >> > --- dev/softraid.c 12 Oct 2010 00:53:32 -0000 1.215 >> > +++ dev/softraid.c 21 Oct 2010 01:36:32 -0000 >> > @@ -126,6 +126,7 @@ void sr_rebuild(void *); >> > void sr_rebuild_thread(void *); >> > void sr_roam_chunks(struct sr_discipline *); >> > int sr_chunk_in_use(struct sr_softc *, dev_t); >> > +void sr_startwu_callback(void *, void *); >> > >> > /* don't include these on RAMDISK */ >> > #ifndef SMALL_KERNEL >> > @@ -1806,6 +1807,8 @@ sr_wu_put(struct sr_workunit *wu) >> > wu->swu_fake = 0; >> > wu->swu_flags = 0; >> > >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_wu_put", DEVNAME(sd->sd_sc)); >> > while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { >> > TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); >> > sr_ccb_put(ccb); >> > @@ -2563,6 +2566,9 @@ sr_hotspare_rebuild(struct sr_discipline >> > busy = 0; >> > >> > s = splbio(); >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_hotspare_rebuild", >> > + DEVNAME(sd->sd_sc)); >> > TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { >> > TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { >> > if (ccb->ccb_target == chunk_no) >> > @@ -2816,6 +2822,11 @@ sr_ioctl_createraid(struct sr_softc *sc, >> > sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); >> > sd->sd_sc = sc; >> > SLIST_INIT(&sd->sd_meta_opt); >> > + sd->sd_workq = workq_create("srdis", 1, IPL_BIO); >> > + if (sd->sd_workq == NULL) { >> > + printf("%s: could not create workq\n"); >> > + goto unwind; >> > + } >> > if (sr_discipline_init(sd, bc->bc_level)) { >> > printf("%s: could not initialize discipline\n", DEVNAME(sc)); >> > goto unwind; >> > @@ -3407,6 +3418,9 @@ sr_discipline_shutdown(struct sr_discipl >> > >> > sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); >> > >> > + if (sd->sd_workq) >> > + workq_destroy(sd->sd_workq); >> > + >> > if (sd) >> > sr_discipline_free(sd); >> > >> > @@ -3625,10 +3639,29 @@ sr_raid_sync(struct sr_workunit *wu) >> > } >> > >> > void >> > +sr_startwu_callback(void *arg1, void *arg2) >> > +{ >> > + struct sr_discipline *sd = arg1; >> > + struct sr_workunit *wu = arg2; >> > + struct sr_ccb *ccb; >> > + int s; >> > + >> > + s = splbio(); >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc)); >> > + wu->swu_cb_active = 1; >> > + >> > + TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) >> > + VOP_STRATEGY(&ccb->ccb_buf); >> > + >> > + wu->swu_cb_active = 0; >> > + splx(s); >> > +} >> > + >> > +void >> > sr_raid_startwu(struct sr_workunit *wu) >> > { >> > struct sr_discipline *sd = wu->swu_dis; >> > - struct sr_ccb *ccb; >> > >> > splassert(IPL_BIO); >> > >> > @@ -3643,9 +3676,8 @@ sr_raid_startwu(struct sr_workunit *wu) >> > TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); >> > >> > /* start all individual ios */ >> > - TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { >> > - VOP_STRATEGY(&ccb->ccb_buf); >> > - } >> > + workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback, >> > + sd, wu); >> > } >> > >> > void >> > Index: dev/softraid_crypto.c >> > =================================================================== >> > RCS file: /cvs/src/sys/dev/softraid_crypto.c,v >> > retrieving revision 1.57 >> > diff -u -p -r1.57 softraid_crypto.c >> > --- dev/softraid_crypto.c 27 Sep 2010 19:49:43 -0000 1.57 >> > +++ dev/softraid_crypto.c 5 Oct 2010 20:49:24 -0000 >> > @@ -164,11 +164,11 @@ sr_crypto_create(struct sr_discipline *s >> > >> > } else if (sr_crypto_get_kdf(bc, sd)) >> > goto done; >> > - >> > + >> > /* Passphrase volumes cannot be automatically assembled. */ >> > if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) && bc->bc_key_disk == NODEV) >> > goto done; >> > - >> > + >> > strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name)); >> > sd->sd_meta->ssdi.ssd_size = coerced_size; >> > >> > @@ -194,15 +194,12 @@ sr_crypto_assemble(struct sr_discipline >> > goto done; >> > >> > if (bc->bc_key_disk != NODEV) { >> > - >> > /* Read the mask key from the key disk. */ >> > sd->mds.mdd_crypto.key_disk = >> > sr_crypto_read_key_disk(sd, bc->bc_key_disk); >> > if (sd->mds.mdd_crypto.key_disk == NULL) >> > goto done; >> > - >> > } else if (bc->bc_opaque_flags & BIOC_SOOUT) { >> > - >> > /* provide userland with kdf hint */ >> > if (bc->bc_opaque == NULL) >> > goto done; >> > @@ -218,10 +215,8 @@ sr_crypto_assemble(struct sr_discipline >> > /* we're done */ >> > bc->bc_opaque_status = BIOC_SOINOUT_OK; >> > rv = EAGAIN; >> > - goto done; >> > - >> > + goto done; >> > } else if (bc->bc_opaque_flags & BIOC_SOIN) { >> > - >> > /* get kdf with maskkey from userland */ >> > if (sr_crypto_get_kdf(bc, sd)) >> > goto done; >> > @@ -253,10 +248,11 @@ sr_crypto_getcryptop(struct sr_workunit >> > s = splbio(); >> > uio = pool_get(&sd->mds.mdd_crypto.sr_uiopl, PR_ZERO | PR_NOWAIT); >> > if (uio == NULL) >> > - goto unwind; >> > - uio->uio_iov = pool_get(&sd->mds.mdd_crypto.sr_iovpl, PR_NOWAIT); >> > + goto poolunwind; >> > + uio->uio_iov = pool_get(&sd->mds.mdd_crypto.sr_iovpl, >> > + PR_ZERO | PR_NOWAIT); >> > if (uio->uio_iov == NULL) >> > - goto unwind; >> > + goto poolunwind; >> > splx(s); >> > >> > uio->uio_iovcnt = 1; >> > @@ -318,11 +314,14 @@ sr_crypto_getcryptop(struct sr_workunit >> > } >> > >> > return (crp); >> > +poolunwind: >> > + splx(s); >> > unwind: >> > if (crp) >> > crypto_freereq(crp); >> > if (uio && uio->uio_iov) >> > - if (wu->swu_xs->flags & SCSI_DATA_OUT) >> > + if ((wu->swu_xs->flags & SCSI_DATA_OUT) && >> > + uio->uio_iov->iov_base) >> > free(uio->uio_iov->iov_base, M_DEVBUF); >> > >> > s = splbio(); >> > @@ -346,7 +345,7 @@ sr_crypto_putcryptop(struct cryptop *crp >> > DNPRINTF(SR_D_DIS, "%s: sr_crypto_putcryptop crp: %p\n", >> > DEVNAME(wu->swu_dis->sd_sc), crp); >> > >> > - if (wu->swu_xs->flags & SCSI_DATA_OUT) >> > + if ((wu->swu_xs->flags & SCSI_DATA_OUT) && uio->uio_iov->iov_base) >> > free(uio->uio_iov->iov_base, M_DEVBUF); >> > s = splbio(); >> > pool_put(&sd->mds.mdd_crypto.sr_iovpl, uio->uio_iov); >> > @@ -783,7 +782,7 @@ sr_crypto_create_key_disk(struct sr_disc >> > DEVNAME(sc), devname); >> > goto fail; >> > } >> > - >> > + >> > goto done; >> > >> > fail: >> > @@ -867,13 +866,7 @@ sr_crypto_read_key_disk(struct sr_discip >> > /* >> > * Read and validate key disk metadata. >> > */ >> > - sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_NOWAIT|M_ZERO); >> > - if (sm == NULL) { >> > - printf("%s: not enough memory for metadata buffer\n", >> > - DEVNAME(sc)); >> > - goto done; >> > - } >> > - >> > + sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); >> > if (sr_meta_native_read(sd, dev, sm, NULL)) { >> > printf("%s: native bootprobe could not read native " >> > "metadata\n", DEVNAME(sc)); >> > @@ -882,7 +875,7 @@ sr_crypto_read_key_disk(struct sr_discip >> > >> > if (sr_meta_validate(sd, dev, sm, NULL)) { >> > DNPRINTF(SR_D_META, "%s: invalid metadata\n", >> > - DEVNAME(sc)); >> > + DEVNAME(sc)); >> > goto done; >> > } >> > >> > @@ -893,13 +886,7 @@ sr_crypto_read_key_disk(struct sr_discip >> > } >> > >> > /* Construct key disk chunk. */ >> > - key_disk = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_NOWAIT|M_ZERO); >> > - if (key_disk == NULL) { >> > - printf("%s: not enough memory for chunk\n", >> > - DEVNAME(sc)); >> > - goto done; >> > - } >> > - >> > + key_disk = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); >> > key_disk->src_dev_mm = dev; >> > key_disk->src_vn = vn; >> > key_disk->src_size = 0; >> > @@ -1106,8 +1093,8 @@ sr_crypto_meta_opt_load(struct sr_discip >> > sd->mds.mdd_crypto.scr_meta = &om->somi.som_meta.smm_crypto; >> > rv = 0; >> > } >> > - >> > - return rv; >> > + >> > + return (rv); >> > } >> > >> > int >> > @@ -1122,7 +1109,7 @@ sr_crypto_rw(struct sr_workunit *wu) >> > if (wu->swu_xs->flags & SCSI_DATA_OUT) { >> > crp = sr_crypto_getcryptop(wu, 1); >> > if (crp == NULL) >> > - panic("sr_crypto_getcryptop"); >> > + panic("sr_crypto_rw: no crypto op"); >> > crp->crp_callback = sr_crypto_write; >> > crp->crp_opaque = wu; >> > s = splvm(); >> > @@ -1211,6 +1198,8 @@ sr_crypto_rw2(struct sr_workunit *wu, st >> > >> > LIST_INIT(&ccb->ccb_buf.b_dep); >> > >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_crypto_rw2", DEVNAME(sd->sd_sc)); >> > TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); >> > >> > DNPRINTF(SR_D_DIS, "%s: %s: sr_crypto_rw2: b_bcount: %d " >> > @@ -1305,7 +1294,7 @@ sr_crypto_intr(struct buf *bp) >> > if ((xs->flags & SCSI_DATA_IN) && (xs->error == XS_NOERROR)) { >> > crp = sr_crypto_getcryptop(wu, 0); >> > if (crp == NULL) >> > - panic("sr_crypto_getcryptop"); >> > + panic("sr_crypto_intr: no crypto op"); >> > ccb->ccb_opaque = crp; >> > crp->crp_callback = sr_crypto_read; >> > crp->crp_opaque = wu; >> > @@ -1341,6 +1330,8 @@ sr_crypto_finish_io(struct sr_workunit * >> > >> > xs->resid = 0; >> > >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_crypto_finish_io", DEVNAME(sd->sd_sc)); >> > TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { >> > if (ccb->ccb_opaque == NULL) >> > continue; >> > Index: dev/softraid_raid1.c >> > =================================================================== >> > RCS file: /cvs/src/sys/dev/softraid_raid1.c,v >> > retrieving revision 1.25 >> > diff -u -p -r1.25 softraid_raid1.c >> > --- dev/softraid_raid1.c 2 Jul 2010 09:20:26 -0000 1.25 >> > +++ dev/softraid_raid1.c 27 Sep 2010 22:13:45 -0000 >> > @@ -476,6 +476,8 @@ ragain: >> > >> > LIST_INIT(&b->b_dep); >> > >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_raid1_rw", DEVNAME(sd->sd_sc)); >> > TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); >> > >> > DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " >> > @@ -560,6 +562,9 @@ sr_raid1_intr(struct buf *bp) >> > printf("%s: retrying read on block %lld\n", >> > DEVNAME(sc), b->b_blkno); >> > sr_ccb_put(ccb); >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_raid1_intr_cb", >> > + DEVNAME(sd->sd_sc)); >> > TAILQ_INIT(&wu->swu_ccb); >> > wu->swu_state = SR_WU_RESTART; >> > if (sd->sd_scsi_rw(wu)) >> > @@ -647,6 +652,8 @@ sr_raid1_recreate_wu(struct sr_workunit >> > DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup); >> > >> > /* toss all ccbs */ >> > + if (wu->swu_cb_active == 1) >> > + panic("%s: sr_raid1_recreate_wu", DEVNAME(sd->sd_sc)); >> > while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { >> > TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); >> > sr_ccb_put(ccb); >> > Index: dev/softraidvar.h >> > =================================================================== >> > RCS file: /cvs/src/sys/dev/softraidvar.h,v >> > retrieving revision 1.95 >> > diff -u -p -r1.95 softraidvar.h >> > --- dev/softraidvar.h 30 Aug 2010 17:32:40 -0000 1.95 >> > +++ dev/softraidvar.h 26 Sep 2010 14:37:20 -0000 >> > @@ -346,6 +346,11 @@ struct sr_workunit { >> > /* all ios that make up this workunit */ >> > struct sr_ccb_list swu_ccb; >> > >> > + /* task memory */ >> > + struct workq_task swu_wqt; >> > + struct workq_task swu_intr; >> > + int swu_cb_active; /* in callback */ >> > + >> > TAILQ_ENTRY(sr_workunit) swu_link; >> > }; >> > >> > @@ -484,6 +489,8 @@ struct sr_discipline { >> > #endif /* AOE */ >> > } sd_dis_specific;/* dis specific members */ >> > #define mds sd_dis_specific >> > + >> > + struct workq *sd_workq; >> > >> > /* discipline metadata */ >> > struct sr_metadata *sd_meta; /* in memory copy of metadata */