Module Name: src Committed By: riz Date: Thu Feb 23 02:22:05 UTC 2012
Modified Files: src/sys/dev/raidframe [netbsd-6]: rf_reconmap.c rf_reconstruct.c Log Message: Pull up following revision(s) (requested by oster in ticket #23): sys/dev/raidframe/rf_reconstruct.c: revision 1.118 sys/dev/raidframe/rf_reconmap.c: revision 1.34 comment, and effectively remove, a DIAGNOSTIC check that is invalid for RAID5_RS. Add logic to the main reconstruction loop to handle RAID5 with rotated spares. While here, observe that we were actually doing one more stripe than we thought we were, and correct that too (it didn't matter for non-RAID5_RS, but it definitely does for RAID5_RS). Add some bounds-checking at the beginning to handle the case where the number of stripes in the set is smaller than the sliding reconstruction window. XXX: this problem likely needs to be fixed for PARITY_DECLUSTERING too. To generate a diff of this commit: cvs rdiff -u -r1.33 -r1.33.8.1 src/sys/dev/raidframe/rf_reconmap.c cvs rdiff -u -r1.117 -r1.117.8.1 src/sys/dev/raidframe/rf_reconstruct.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/raidframe/rf_reconmap.c diff -u src/sys/dev/raidframe/rf_reconmap.c:1.33 src/sys/dev/raidframe/rf_reconmap.c:1.33.8.1 --- src/sys/dev/raidframe/rf_reconmap.c:1.33 Wed Aug 31 18:31:02 2011 +++ src/sys/dev/raidframe/rf_reconmap.c Thu Feb 23 02:22:05 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_reconmap.c,v 1.33 2011/08/31 18:31:02 plunky Exp $ */ +/* $NetBSD: rf_reconmap.c,v 1.33.8.1 2012/02/23 02:22:05 riz Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -34,7 +34,7 @@ *************************************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.33 2011/08/31 18:31:02 plunky Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.33.8.1 2012/02/23 02:22:05 riz Exp $"); #include "rf_raid.h" #include <sys/time.h> @@ -157,7 +157,14 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF /* do we need to move the queue? */ while (i > mapPtr->high_ru) { +#if 0 #ifdef DIAGNOSTIC + /* XXX: The check below is not valid for + * RAID5_RS. It is valid for RAID 1 and RAID 5. + * The issue is that we can easily have + * RU_NOTHING entries here too, and those are + * quite correct. + */ if (mapPtr->status[mapPtr->head]!=RU_ALL) { printf("\nraid%d: reconmap incorrect -- working on i %" PRIu64 "\n", raidPtr->raidid, i); @@ -170,6 +177,7 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF panic("reconmap incorrect"); } #endif +#endif mapPtr->low_ru++; mapPtr->high_ru++; /* initialize "highest" RU status entry, which Index: src/sys/dev/raidframe/rf_reconstruct.c diff -u src/sys/dev/raidframe/rf_reconstruct.c:1.117 src/sys/dev/raidframe/rf_reconstruct.c:1.117.8.1 --- src/sys/dev/raidframe/rf_reconstruct.c:1.117 Fri Oct 14 09:23:30 2011 +++ src/sys/dev/raidframe/rf_reconstruct.c Thu Feb 23 02:22:05 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_reconstruct.c,v 1.117 2011/10/14 09:23:30 hannken Exp $ */ +/* $NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,7 +33,7 @@ ************************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117 2011/10/14 09:23:30 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $"); #include <sys/param.h> #include <sys/time.h> @@ -570,6 +570,9 @@ rf_ContinueReconstructFailedDisk(RF_Raid RF_ReconCtrl_t *tmp_reconctrl; RF_ReconEvent_t *event; RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev; +#if RF_INCLUDE_RAID5_RS > 0 + RF_StripeCount_t startPSID,endPSID,aPSID,bPSID,offPSID; +#endif RF_ReconUnitCount_t RUsPerPU; struct timeval etime, elpsd; unsigned long xor_s, xor_resid_us; @@ -622,7 +625,17 @@ rf_ContinueReconstructFailedDisk(RF_Raid recon_error = 0; write_error = 0; pending_writes = incPSID; - raidPtr->reconControl->lastPSID = incPSID; + raidPtr->reconControl->lastPSID = incPSID - 1; + + /* bounds check raidPtr->reconControl->lastPSID and + pending_writes so that we don't attempt to wait for more IO + than can possibly happen */ + + if (raidPtr->reconControl->lastPSID > lastPSID) + raidPtr->reconControl->lastPSID = lastPSID; + + if (pending_writes > lastPSID) + pending_writes = lastPSID; /* start the actual reconstruction */ @@ -636,6 +649,49 @@ rf_ContinueReconstructFailedDisk(RF_Raid } num_writes = 0; + +#if RF_INCLUDE_RAID5_RS > 0 + /* For RAID5 with Rotated Spares we will be 'short' + some number of writes since no writes will get + issued for stripes where the spare is on the + component being rebuilt. Account for the shortage + here so that we don't hang indefinitely below + waiting for writes to complete that were never + scheduled. + + XXX: Should be fixed for PARITY_DECLUSTERING and + others too! + + */ + + if (raidPtr->Layout.numDataCol < + raidPtr->numCol - raidPtr->Layout.numParityCol) { + /* numDataCol is at least 2 less than numCol, so + should be RAID 5 with Rotated Spares */ + + /* XXX need to update for RAID 6 */ + + startPSID = raidPtr->reconControl->lastPSID - pending_writes + 1; + endPSID = raidPtr->reconControl->lastPSID; + + offPSID = raidPtr->numCol - col - 1; + + aPSID = startPSID - startPSID % raidPtr->numCol + offPSID; + if (aPSID < startPSID) { + aPSID += raidPtr->numCol; + } + + bPSID = endPSID - ((endPSID - offPSID) % raidPtr->numCol); + + if (aPSID < endPSID) { + num_writes = ((bPSID - aPSID) / raidPtr->numCol) + 1; + } + + if ((aPSID == endPSID) && (bPSID == endPSID)) { + num_writes++; + } + } +#endif /* issue a read for each surviving disk */ @@ -714,7 +770,7 @@ rf_ContinueReconstructFailedDisk(RF_Raid #endif } - /* reads done, wakup any waiters, and then wait for writes */ + /* reads done, wakeup any waiters, and then wait for writes */ rf_WakeupHeadSepCBWaiters(raidPtr); @@ -1134,7 +1190,7 @@ IssueNextReadRequest(RF_Raid_t *raidPtr, ctrl->ru_count = 0; /* code left over from when head-sep was based on * parity stripe id */ - if (ctrl->curPSID >= raidPtr->reconControl->lastPSID) { + if (ctrl->curPSID > raidPtr->reconControl->lastPSID) { CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter)); return (RF_RECON_DONE_READS); /* finito! */ }