Module Name:    src
Committed By:   riz
Date:           Thu Feb 23 02:22:05 UTC 2012

Modified Files:
        src/sys/dev/raidframe [netbsd-6]: rf_reconmap.c rf_reconstruct.c

Log Message:
Pull up following revision(s) (requested by oster in ticket #23):
        sys/dev/raidframe/rf_reconstruct.c: revision 1.118
        sys/dev/raidframe/rf_reconmap.c: revision 1.34
comment, and effectively remove, a DIAGNOSTIC check that
is invalid for RAID5_RS.
Add logic to the main reconstruction loop to handle RAID5 with rotated
spares.  While here, observe that we were actually doing one more
stripe than we thought we were, and correct that too (it didn't matter
for non-RAID5_RS, but it definitely does for RAID5_RS).  Add some
bounds-checking at the beginning to handle the case where the number
of stripes in the set is smaller than the sliding reconstruction window.
XXX: this problem likely needs to be fixed for PARITY_DECLUSTERING too.


To generate a diff of this commit:
cvs rdiff -u -r1.33 -r1.33.8.1 src/sys/dev/raidframe/rf_reconmap.c
cvs rdiff -u -r1.117 -r1.117.8.1 src/sys/dev/raidframe/rf_reconstruct.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/raidframe/rf_reconmap.c
diff -u src/sys/dev/raidframe/rf_reconmap.c:1.33 src/sys/dev/raidframe/rf_reconmap.c:1.33.8.1
--- src/sys/dev/raidframe/rf_reconmap.c:1.33	Wed Aug 31 18:31:02 2011
+++ src/sys/dev/raidframe/rf_reconmap.c	Thu Feb 23 02:22:05 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconmap.c,v 1.33 2011/08/31 18:31:02 plunky Exp $	*/
+/*	$NetBSD: rf_reconmap.c,v 1.33.8.1 2012/02/23 02:22:05 riz Exp $	*/
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -34,7 +34,7 @@
  *************************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.33 2011/08/31 18:31:02 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.33.8.1 2012/02/23 02:22:05 riz Exp $");
 
 #include "rf_raid.h"
 #include <sys/time.h>
@@ -157,7 +157,14 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF
 
 		/* do we need to move the queue? */
 		while (i > mapPtr->high_ru) {
+#if 0
 #ifdef DIAGNOSTIC
+			/* XXX: The check below is not valid for
+			 * RAID5_RS.  It is valid for RAID 1 and RAID 5.
+			 * The issue is that we can easily have
+			 * RU_NOTHING entries here too, and those are
+			 * quite correct.
+			 */
 			if (mapPtr->status[mapPtr->head]!=RU_ALL) {
 				printf("\nraid%d: reconmap incorrect -- working on i %" PRIu64 "\n",
 				       raidPtr->raidid, i);
@@ -170,6 +177,7 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF
 				panic("reconmap incorrect");
 			} 
 #endif
+#endif
 			mapPtr->low_ru++;
 			mapPtr->high_ru++;
 			/* initialize "highest" RU status entry, which

Index: src/sys/dev/raidframe/rf_reconstruct.c
diff -u src/sys/dev/raidframe/rf_reconstruct.c:1.117 src/sys/dev/raidframe/rf_reconstruct.c:1.117.8.1
--- src/sys/dev/raidframe/rf_reconstruct.c:1.117	Fri Oct 14 09:23:30 2011
+++ src/sys/dev/raidframe/rf_reconstruct.c	Thu Feb 23 02:22:05 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconstruct.c,v 1.117 2011/10/14 09:23:30 hannken Exp $	*/
+/*	$NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $	*/
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -33,7 +33,7 @@
  ************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117 2011/10/14 09:23:30 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/time.h>
@@ -570,6 +570,9 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 	RF_ReconCtrl_t *tmp_reconctrl;
 	RF_ReconEvent_t *event;
 	RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev;
+#if RF_INCLUDE_RAID5_RS > 0
+	RF_StripeCount_t startPSID,endPSID,aPSID,bPSID,offPSID;
+#endif
 	RF_ReconUnitCount_t RUsPerPU;
 	struct timeval etime, elpsd;
 	unsigned long xor_s, xor_resid_us;
@@ -622,7 +625,17 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 	recon_error = 0;
 	write_error = 0;
 	pending_writes = incPSID;
-	raidPtr->reconControl->lastPSID = incPSID;
+	raidPtr->reconControl->lastPSID = incPSID - 1;
+
+	/* bounds check raidPtr->reconControl->lastPSID and
+	   pending_writes so that we don't attempt to wait for more IO
+	   than can possibly happen */
+
+	if (raidPtr->reconControl->lastPSID > lastPSID)
+		raidPtr->reconControl->lastPSID = lastPSID;
+
+	if (pending_writes > lastPSID)
+		pending_writes = lastPSID;
 
 	/* start the actual reconstruction */
 
@@ -636,6 +649,49 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 		}
 
 		num_writes = 0;
+
+#if RF_INCLUDE_RAID5_RS > 0
+		/* For RAID5 with Rotated Spares we will be 'short'
+		   some number of writes since no writes will get
+		   issued for stripes where the spare is on the
+		   component being rebuilt.  Account for the shortage
+		   here so that we don't hang indefinitely below
+		   waiting for writes to complete that were never
+		   scheduled.
+
+		   XXX: Should be fixed for PARITY_DECLUSTERING and
+		   others too! 
+
+		*/
+
+		if (raidPtr->Layout.numDataCol < 
+		    raidPtr->numCol - raidPtr->Layout.numParityCol) {
+			/* numDataCol is at least 2 less than numCol, so
+			   should be RAID 5 with Rotated Spares */
+
+			/* XXX need to update for RAID 6 */
+			
+			startPSID = raidPtr->reconControl->lastPSID - pending_writes + 1;
+			endPSID = raidPtr->reconControl->lastPSID;
+			
+			offPSID = raidPtr->numCol - col - 1;
+			
+			aPSID = startPSID - startPSID % raidPtr->numCol + offPSID;
+			if (aPSID < startPSID) {
+				aPSID += raidPtr->numCol;
+			}
+			
+			bPSID = endPSID - ((endPSID - offPSID) % raidPtr->numCol);
+			
+			if (aPSID < endPSID) {
+				num_writes = ((bPSID - aPSID) / raidPtr->numCol) + 1;
+			}
+			
+			if ((aPSID == endPSID) && (bPSID == endPSID)) {
+				num_writes++;
+			}
+		}
+#endif
 		
 		/* issue a read for each surviving disk */
 		
@@ -714,7 +770,7 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 #endif
 		}
 
-		/* reads done, wakup any waiters, and then wait for writes */
+		/* reads done, wakeup any waiters, and then wait for writes */
 
 		rf_WakeupHeadSepCBWaiters(raidPtr);
 
@@ -1134,7 +1190,7 @@ IssueNextReadRequest(RF_Raid_t *raidPtr,
 			ctrl->ru_count = 0;
 			/* code left over from when head-sep was based on
 			 * parity stripe id */
-			if (ctrl->curPSID >= raidPtr->reconControl->lastPSID) {
+			if (ctrl->curPSID > raidPtr->reconControl->lastPSID) {
 				CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter));
 				return (RF_RECON_DONE_READS);	/* finito! */
 			}

Reply via email to