On 11/02/10 15:56, Marco Peereboom wrote:
> Got tons of great test results but still not an independent RAID 1
> rebuild.  I really want to commit this but I won't until someone else
> besides me tests this.
> 
> On Thu, Oct 21, 2010 at 02:17:58PM -0500, Marco Peereboom wrote:

Sun E250 (sparc64), built softraid RAID1 pair, installed kernel with
this patch, removed one drive from the RAID set, replaced drive with a
different drive, rebuilt RAID1 to that new drive.

No issues seen.

Nick.


>> anyone?
>> 
>> On Wed, Oct 20, 2010 at 08:47:00PM -0500, Marco Peereboom wrote:
>> > On Thu, Sep 30, 2010 at 03:35:33AM +0200, Tobias Ulmer wrote:
>> > > I got this after a while:
>> > > 
>> > > panic: softraid0: sr_crypto_finish_io
>> > > 
>> > > No serial, so there's no more info. You know where to find me
>> > 
>> > new diff that should fix all them issues.
>> > 
>> > please test, especially raid 1 including rebuild and stuff.
>> > 
>> > Index: dev/softraid.c
>> > ===================================================================
>> > RCS file: /cvs/src/sys/dev/softraid.c,v
>> > retrieving revision 1.215
>> > diff -u -p -r1.215 softraid.c
>> > --- dev/softraid.c 12 Oct 2010 00:53:32 -0000      1.215
>> > +++ dev/softraid.c 21 Oct 2010 01:36:32 -0000
>> > @@ -126,6 +126,7 @@ void                   sr_rebuild(void *);
>> >  void                      sr_rebuild_thread(void *);
>> >  void                      sr_roam_chunks(struct sr_discipline *);
>> >  int                       sr_chunk_in_use(struct sr_softc *, dev_t);
>> > +void                      sr_startwu_callback(void *, void *);
>> >  
>> >  /* don't include these on RAMDISK */
>> >  #ifndef SMALL_KERNEL
>> > @@ -1806,6 +1807,8 @@ sr_wu_put(struct sr_workunit *wu)
>> >    wu->swu_fake = 0;
>> >    wu->swu_flags = 0;
>> >  
>> > +  if (wu->swu_cb_active == 1)
>> > +          panic("%s: sr_wu_put", DEVNAME(sd->sd_sc));
>> >    while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
>> >            TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
>> >            sr_ccb_put(ccb);
>> > @@ -2563,6 +2566,9 @@ sr_hotspare_rebuild(struct sr_discipline
>> >                    busy = 0;
>> >  
>> >                    s = splbio();
>> > +                  if (wu->swu_cb_active == 1)
>> > +                          panic("%s: sr_hotspare_rebuild",
>> > +                              DEVNAME(sd->sd_sc));
>> >                    TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) {
>> >                            TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
>> >                                    if (ccb->ccb_target == chunk_no)
>> > @@ -2816,6 +2822,11 @@ sr_ioctl_createraid(struct sr_softc *sc,
>> >    sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
>> >    sd->sd_sc = sc;
>> >    SLIST_INIT(&sd->sd_meta_opt);
>> > +  sd->sd_workq = workq_create("srdis", 1, IPL_BIO);
>> > +  if (sd->sd_workq == NULL) {
>> > +          printf("%s: could not create workq\n");
>> > +          goto unwind;
>> > +  }
>> >    if (sr_discipline_init(sd, bc->bc_level)) {
>> >            printf("%s: could not initialize discipline\n", DEVNAME(sc));
>> >            goto unwind;
>> > @@ -3407,6 +3418,9 @@ sr_discipline_shutdown(struct sr_discipl
>> >  
>> >    sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
>> >  
>> > +  if (sd->sd_workq)
>> > +          workq_destroy(sd->sd_workq);
>> > +
>> >    if (sd)
>> >            sr_discipline_free(sd);
>> >  
>> > @@ -3625,10 +3639,29 @@ sr_raid_sync(struct sr_workunit *wu)
>> >  }
>> >  
>> >  void
>> > +sr_startwu_callback(void *arg1, void *arg2)
>> > +{
>> > +  struct sr_discipline    *sd = arg1;
>> > +  struct sr_workunit      *wu = arg2;
>> > +  struct sr_ccb           *ccb;
>> > +  int                     s;
>> > +
>> > +  s = splbio();
>> > +  if (wu->swu_cb_active == 1)
>> > +          panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc));
>> > +  wu->swu_cb_active = 1;
>> > +
>> > +  TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
>> > +          VOP_STRATEGY(&ccb->ccb_buf);
>> > +
>> > +  wu->swu_cb_active = 0;
>> > +  splx(s);
>> > +}
>> > +
>> > +void
>> >  sr_raid_startwu(struct sr_workunit *wu)
>> >  {
>> >    struct sr_discipline    *sd = wu->swu_dis;
>> > -  struct sr_ccb           *ccb;
>> >  
>> >    splassert(IPL_BIO);
>> >  
>> > @@ -3643,9 +3676,8 @@ sr_raid_startwu(struct sr_workunit *wu)
>> >            TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
>> >  
>> >    /* start all individual ios */
>> > -  TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
>> > -          VOP_STRATEGY(&ccb->ccb_buf);
>> > -  }
>> > +  workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback,
>> > +      sd, wu);
>> >  }
>> >  
>> >  void
>> > Index: dev/softraid_crypto.c
>> > ===================================================================
>> > RCS file: /cvs/src/sys/dev/softraid_crypto.c,v
>> > retrieving revision 1.57
>> > diff -u -p -r1.57 softraid_crypto.c
>> > --- dev/softraid_crypto.c  27 Sep 2010 19:49:43 -0000      1.57
>> > +++ dev/softraid_crypto.c  5 Oct 2010 20:49:24 -0000
>> > @@ -164,11 +164,11 @@ sr_crypto_create(struct sr_discipline *s
>> >  
>> >    } else if (sr_crypto_get_kdf(bc, sd))
>> >            goto done;
>> > - 
>> > +
>> >    /* Passphrase volumes cannot be automatically assembled. */
>> >    if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) && bc->bc_key_disk == NODEV)
>> >            goto done;
>> > - 
>> > +
>> >    strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name));
>> >    sd->sd_meta->ssdi.ssd_size = coerced_size;
>> >  
>> > @@ -194,15 +194,12 @@ sr_crypto_assemble(struct sr_discipline 
>> >            goto done;
>> >  
>> >    if (bc->bc_key_disk != NODEV) {
>> > -
>> >            /* Read the mask key from the key disk. */
>> >            sd->mds.mdd_crypto.key_disk =
>> >                sr_crypto_read_key_disk(sd, bc->bc_key_disk);
>> >            if (sd->mds.mdd_crypto.key_disk == NULL)
>> >                    goto done;
>> > -
>> >    } else if (bc->bc_opaque_flags & BIOC_SOOUT) {
>> > -
>> >            /* provide userland with kdf hint */
>> >            if (bc->bc_opaque == NULL)
>> >                    goto done;
>> > @@ -218,10 +215,8 @@ sr_crypto_assemble(struct sr_discipline 
>> >            /* we're done */
>> >            bc->bc_opaque_status = BIOC_SOINOUT_OK;
>> >            rv = EAGAIN;
>> > -          goto done;
>> > -
>> > +          goto done;
>> >    } else if (bc->bc_opaque_flags & BIOC_SOIN) {
>> > -
>> >            /* get kdf with maskkey from userland */
>> >            if (sr_crypto_get_kdf(bc, sd))
>> >                    goto done;
>> > @@ -253,10 +248,11 @@ sr_crypto_getcryptop(struct sr_workunit 
>> >    s = splbio();
>> >    uio = pool_get(&sd->mds.mdd_crypto.sr_uiopl, PR_ZERO | PR_NOWAIT);
>> >    if (uio == NULL)
>> > -          goto unwind;
>> > -  uio->uio_iov = pool_get(&sd->mds.mdd_crypto.sr_iovpl, PR_NOWAIT);
>> > +          goto poolunwind;
>> > +  uio->uio_iov = pool_get(&sd->mds.mdd_crypto.sr_iovpl,
>> > +      PR_ZERO | PR_NOWAIT);
>> >    if (uio->uio_iov == NULL)
>> > -          goto unwind;
>> > +          goto poolunwind;
>> >    splx(s);
>> >  
>> >    uio->uio_iovcnt = 1;
>> > @@ -318,11 +314,14 @@ sr_crypto_getcryptop(struct sr_workunit 
>> >    }
>> >  
>> >    return (crp);
>> > +poolunwind:
>> > +  splx(s);
>> >  unwind:
>> >    if (crp)
>> >            crypto_freereq(crp);
>> >    if (uio && uio->uio_iov)
>> > -          if (wu->swu_xs->flags & SCSI_DATA_OUT)
>> > +          if ((wu->swu_xs->flags & SCSI_DATA_OUT) &&
>> > +              uio->uio_iov->iov_base)
>> >                    free(uio->uio_iov->iov_base, M_DEVBUF);
>> >  
>> >    s = splbio();
>> > @@ -346,7 +345,7 @@ sr_crypto_putcryptop(struct cryptop *crp
>> >    DNPRINTF(SR_D_DIS, "%s: sr_crypto_putcryptop crp: %p\n",
>> >        DEVNAME(wu->swu_dis->sd_sc), crp);
>> >  
>> > -  if (wu->swu_xs->flags & SCSI_DATA_OUT)
>> > +  if ((wu->swu_xs->flags & SCSI_DATA_OUT) && uio->uio_iov->iov_base)
>> >            free(uio->uio_iov->iov_base, M_DEVBUF);
>> >    s = splbio();
>> >    pool_put(&sd->mds.mdd_crypto.sr_iovpl, uio->uio_iov);
>> > @@ -783,7 +782,7 @@ sr_crypto_create_key_disk(struct sr_disc
>> >                DEVNAME(sc), devname);
>> >            goto fail;
>> >    }
>> > -  
>> > +
>> >    goto done;
>> >  
>> >  fail:
>> > @@ -867,13 +866,7 @@ sr_crypto_read_key_disk(struct sr_discip
>> >    /*
>> >     * Read and validate key disk metadata.
>> >     */
>> > -  sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_NOWAIT|M_ZERO);
>> > -  if (sm == NULL) {
>> > -          printf("%s: not enough memory for metadata buffer\n",
>> > -              DEVNAME(sc));
>> > -          goto done;
>> > -  }
>> > -
>> > +  sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
>> >    if (sr_meta_native_read(sd, dev, sm, NULL)) {
>> >            printf("%s: native bootprobe could not read native "
>> >                "metadata\n", DEVNAME(sc));
>> > @@ -882,7 +875,7 @@ sr_crypto_read_key_disk(struct sr_discip
>> >  
>> >    if (sr_meta_validate(sd, dev, sm, NULL)) {
>> >            DNPRINTF(SR_D_META, "%s: invalid metadata\n",
>> > -              DEVNAME(sc));
>> > +              DEVNAME(sc));
>> >            goto done;
>> >    }
>> >  
>> > @@ -893,13 +886,7 @@ sr_crypto_read_key_disk(struct sr_discip
>> >    }
>> >  
>> >    /* Construct key disk chunk. */
>> > -  key_disk = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_NOWAIT|M_ZERO);
>> > -  if (key_disk == NULL) {
>> > -          printf("%s: not enough memory for chunk\n",
>> > -              DEVNAME(sc));
>> > -          goto done;
>> > -  }
>> > -
>> > +  key_disk = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO);
>> >    key_disk->src_dev_mm = dev;
>> >    key_disk->src_vn = vn;
>> >    key_disk->src_size = 0;
>> > @@ -1106,8 +1093,8 @@ sr_crypto_meta_opt_load(struct sr_discip
>> >            sd->mds.mdd_crypto.scr_meta =  &om->somi.som_meta.smm_crypto;
>> >            rv = 0;
>> >    }
>> > -  
>> > -  return rv;
>> > +
>> > +  return (rv);
>> >  }
>> >  
>> >  int
>> > @@ -1122,7 +1109,7 @@ sr_crypto_rw(struct sr_workunit *wu)
>> >    if (wu->swu_xs->flags & SCSI_DATA_OUT) {
>> >            crp = sr_crypto_getcryptop(wu, 1);
>> >            if (crp == NULL)
>> > -                  panic("sr_crypto_getcryptop");
>> > +                  panic("sr_crypto_rw: no crypto op");
>> >            crp->crp_callback = sr_crypto_write;
>> >            crp->crp_opaque = wu;
>> >            s = splvm();
>> > @@ -1211,6 +1198,8 @@ sr_crypto_rw2(struct sr_workunit *wu, st
>> >  
>> >    LIST_INIT(&ccb->ccb_buf.b_dep);
>> >  
>> > +  if (wu->swu_cb_active == 1)
>> > +          panic("%s: sr_crypto_rw2", DEVNAME(sd->sd_sc));
>> >    TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
>> >  
>> >    DNPRINTF(SR_D_DIS, "%s: %s: sr_crypto_rw2: b_bcount: %d "
>> > @@ -1305,7 +1294,7 @@ sr_crypto_intr(struct buf *bp)
>> >            if ((xs->flags & SCSI_DATA_IN) && (xs->error == XS_NOERROR)) {
>> >                    crp = sr_crypto_getcryptop(wu, 0);
>> >                    if (crp == NULL)
>> > -                          panic("sr_crypto_getcryptop");
>> > +                          panic("sr_crypto_intr: no crypto op");
>> >                    ccb->ccb_opaque = crp;
>> >                    crp->crp_callback = sr_crypto_read;
>> >                    crp->crp_opaque = wu;
>> > @@ -1341,6 +1330,8 @@ sr_crypto_finish_io(struct sr_workunit *
>> >  
>> >    xs->resid = 0;
>> >  
>> > +  if (wu->swu_cb_active == 1)
>> > +          panic("%s: sr_crypto_finish_io", DEVNAME(sd->sd_sc));
>> >    TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
>> >            if (ccb->ccb_opaque == NULL)
>> >                    continue;
>> > Index: dev/softraid_raid1.c
>> > ===================================================================
>> > RCS file: /cvs/src/sys/dev/softraid_raid1.c,v
>> > retrieving revision 1.25
>> > diff -u -p -r1.25 softraid_raid1.c
>> > --- dev/softraid_raid1.c   2 Jul 2010 09:20:26 -0000       1.25
>> > +++ dev/softraid_raid1.c   27 Sep 2010 22:13:45 -0000
>> > @@ -476,6 +476,8 @@ ragain:
>> >  
>> >            LIST_INIT(&b->b_dep);
>> >  
>> > +          if (wu->swu_cb_active == 1)
>> > +                  panic("%s: sr_raid1_rw", DEVNAME(sd->sd_sc));
>> >            TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
>> >  
>> >            DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d "
>> > @@ -560,6 +562,9 @@ sr_raid1_intr(struct buf *bp)
>> >                            printf("%s: retrying read on block %lld\n",
>> >                                DEVNAME(sc), b->b_blkno);
>> >                            sr_ccb_put(ccb);
>> > +                          if (wu->swu_cb_active == 1)
>> > +                                  panic("%s: sr_raid1_intr_cb",
>> > +                                      DEVNAME(sd->sd_sc));
>> >                            TAILQ_INIT(&wu->swu_ccb);
>> >                            wu->swu_state = SR_WU_RESTART;
>> >                            if (sd->sd_scsi_rw(wu))
>> > @@ -647,6 +652,8 @@ sr_raid1_recreate_wu(struct sr_workunit 
>> >            DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup);
>> >  
>> >            /* toss all ccbs */
>> > +          if (wu->swu_cb_active == 1)
>> > +                  panic("%s: sr_raid1_recreate_wu", DEVNAME(sd->sd_sc));
>> >            while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) {
>> >                    TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link);
>> >                    sr_ccb_put(ccb);
>> > Index: dev/softraidvar.h
>> > ===================================================================
>> > RCS file: /cvs/src/sys/dev/softraidvar.h,v
>> > retrieving revision 1.95
>> > diff -u -p -r1.95 softraidvar.h
>> > --- dev/softraidvar.h      30 Aug 2010 17:32:40 -0000      1.95
>> > +++ dev/softraidvar.h      26 Sep 2010 14:37:20 -0000
>> > @@ -346,6 +346,11 @@ struct sr_workunit {
>> >    /* all ios that make up this workunit */
>> >    struct sr_ccb_list      swu_ccb;
>> >  
>> > +  /* task memory */
>> > +  struct workq_task       swu_wqt;
>> > +  struct workq_task       swu_intr;
>> > +  int                     swu_cb_active;  /* in callback */
>> > +
>> >    TAILQ_ENTRY(sr_workunit) swu_link;
>> >  };
>> >  
>> > @@ -484,6 +489,8 @@ struct sr_discipline {
>> >  #endif /* AOE */
>> >    }                       sd_dis_specific;/* dis specific members */
>> >  #define mds                       sd_dis_specific
>> > +
>> > +  struct workq            *sd_workq;
>> >  
>> >    /* discipline metadata */
>> >    struct sr_metadata      *sd_meta;       /* in memory copy of metadata */

Reply via email to