Module Name: src Committed By: snj Date: Tue Dec 2 22:05:14 UTC 2014
Modified Files: src/sys/dev/raidframe [netbsd-6]: raidframevar.h rf_netbsdkintf.c rf_reconstruct.c Log Message: Pull up following revision(s) (requested by oster in ticket #1194): sys/dev/raidframe/raidframevar.h: revision 1.17 sys/dev/raidframe/rf_netbsdkintf.c: revision 1.316 sys/dev/raidframe/rf_reconstruct.c: revision 1.121 Fix a long-standing bug related to rebooting while a reconstruct-to-spare is underway but not yet complete. The issue was that a component was being marked as a used_spare when the rebuild started, not when the rebuild was actually finished. Marking it as a used_spare meant that the component label on the spare was being updated such that after a reboot the component would be considered up-to-date, regardless of whether the rebuild actually completed! This fix includes: 1) Add an additional state "rf_ds_rebuilding_spare" which is used to denote that a spare is currently being rebuilt from the live components. 2) Update the comments on the disk states, which were out-of-sync with reality. 3) When rebuilding to a spare component, that spare now enters the state rf_ds_rebuilding_spare instead of the state rf_ds_used_spare. 4) When the rebuild is actually complete then the spare component enters the rf_ds_used_spare state. rf_ds_used_spare is now used exclusively for the case where the rebuilding to the spare has completed successfully. XXX: Someday we need to teach raidctl(8) about this new state, and take out the backwards compatibility code in rf_netbsdkintf.c (see RAIDFRAME_GET_INFO in raidioctl()). For today, this fix needs to be generic enough that it can get backported without major grief. XXX: Needs pullup to netbsd-5*, netbsd-6*, and netbsd-7 Fixes PR#49244. To generate a diff of this commit: cvs rdiff -u -r1.15 -r1.15.10.1 src/sys/dev/raidframe/raidframevar.h cvs rdiff -u -r1.295.6.2 -r1.295.6.3 src/sys/dev/raidframe/rf_netbsdkintf.c cvs rdiff -u -r1.117.8.1 -r1.117.8.2 src/sys/dev/raidframe/rf_reconstruct.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/raidframe/raidframevar.h diff -u src/sys/dev/raidframe/raidframevar.h:1.15 src/sys/dev/raidframe/raidframevar.h:1.15.10.1 --- src/sys/dev/raidframe/raidframevar.h:1.15 Sat Feb 19 07:11:09 2011 +++ src/sys/dev/raidframe/raidframevar.h Tue Dec 2 22:05:14 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: raidframevar.h,v 1.15 2011/02/19 07:11:09 enami Exp $ */ +/* $NetBSD: raidframevar.h,v 1.15.10.1 2014/12/02 22:05:14 snj Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -384,17 +384,17 @@ struct RF_SparetWait_s { * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK(). */ enum RF_DiskStatus_e { - rf_ds_optimal, /* no problems */ - rf_ds_failed, /* reconstruction ongoing */ - rf_ds_reconstructing, /* reconstruction complete to spare, dead disk - * not yet replaced */ - rf_ds_dist_spared, /* reconstruction complete to distributed + rf_ds_optimal, /* no problems */ + rf_ds_failed, /* disk has failed */ + rf_ds_reconstructing, /* reconstruction ongoing */ + rf_ds_dist_spared, /* reconstruction complete to distributed * spare space, dead disk not yet replaced */ - rf_ds_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spare, /* an available spare disk */ - rf_ds_used_spare /* a spare which has been used, and hence is + rf_ds_spared, /* reconstruction complete, dead disk not + yet replaced */ + rf_ds_spare, /* an available spare disk */ + rf_ds_used_spare, /* a spare which has been used, and hence is * not available */ + rf_ds_rebuilding_spare /* a spare which is being rebuilt to */ }; typedef enum RF_DiskStatus_e RF_DiskStatus_t; Index: src/sys/dev/raidframe/rf_netbsdkintf.c diff -u src/sys/dev/raidframe/rf_netbsdkintf.c:1.295.6.2 src/sys/dev/raidframe/rf_netbsdkintf.c:1.295.6.3 --- src/sys/dev/raidframe/rf_netbsdkintf.c:1.295.6.2 Mon Aug 13 19:41:29 2012 +++ src/sys/dev/raidframe/rf_netbsdkintf.c Tue Dec 2 22:05:14 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_netbsdkintf.c,v 1.295.6.2 2012/08/13 19:41:29 riz Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.295.6.3 2014/12/02 22:05:14 snj Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. @@ -101,7 +101,7 @@ ***********************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.295.6.2 2012/08/13 19:41:29 riz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.295.6.3 2014/12/02 22:05:14 snj Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -1468,6 +1468,10 @@ raidioctl(dev_t dev, u_long cmd, void *d } for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { d_cfg->spares[i] = raidPtr->Disks[j]; + if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) { + /* XXX: raidctl(8) expects to see this as a used spare */ + d_cfg->spares[i].status = rf_ds_used_spare; + } } retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); Index: src/sys/dev/raidframe/rf_reconstruct.c diff -u src/sys/dev/raidframe/rf_reconstruct.c:1.117.8.1 src/sys/dev/raidframe/rf_reconstruct.c:1.117.8.2 --- src/sys/dev/raidframe/rf_reconstruct.c:1.117.8.1 Thu Feb 23 02:22:05 2012 +++ src/sys/dev/raidframe/rf_reconstruct.c Tue Dec 2 22:05:14 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $ */ +/* $NetBSD: rf_reconstruct.c,v 1.117.8.2 2014/12/02 22:05:14 snj Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,7 +33,7 @@ ************************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117.8.2 2014/12/02 22:05:14 snj Exp $"); #include <sys/param.h> #include <sys/time.h> @@ -261,7 +261,7 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) { if (raidPtr->Disks[scol].status == rf_ds_spare) { spareDiskPtr = &raidPtr->Disks[scol]; - spareDiskPtr->status = rf_ds_used_spare; + spareDiskPtr->status = rf_ds_rebuilding_spare; break; } } @@ -308,6 +308,13 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t /* XXX doesn't hold for RAID 6!!*/ rf_lock_mutex2(raidPtr->mutex); + /* The failed disk has already been marked as rf_ds_spared + (or rf_ds_dist_spared) in + rf_ContinueReconstructFailedDisk() + so we just update the spare disk as being a used spare + */ + + spareDiskPtr->status = rf_ds_used_spare; raidPtr->parity_good = RF_RAID_CLEAN; rf_unlock_mutex2(raidPtr->mutex); @@ -494,7 +501,7 @@ rf_ReconstructInPlace(RF_Raid_t *raidPtr rf_unlock_mutex2(raidPtr->mutex); spareDiskPtr = &raidPtr->Disks[col]; - spareDiskPtr->status = rf_ds_used_spare; + spareDiskPtr->status = rf_ds_rebuilding_spare; printf("raid%d: initiating in-place reconstruction on column %d\n", raidPtr->raidid, col);