Module Name:    src
Committed By:   reinoud
Date:           Mon Jul  6 17:13:38 UTC 2009

Modified Files:
        src/sys/fs/udf: udf_strat_rmw.c

Log Message:
Re-implement read-modify-write backend strategy. This version is a lot more
clean locking-wise and will consume less CPU power on needless moving-around.


To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/sys/fs/udf/udf_strat_rmw.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/fs/udf/udf_strat_rmw.c
diff -u src/sys/fs/udf/udf_strat_rmw.c:1.20 src/sys/fs/udf/udf_strat_rmw.c:1.21
--- src/sys/fs/udf/udf_strat_rmw.c:1.20	Thu Jul  2 16:56:35 2009
+++ src/sys/fs/udf/udf_strat_rmw.c	Mon Jul  6 17:13:38 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: udf_strat_rmw.c,v 1.20 2009/07/02 16:56:35 reinoud Exp $ */
+/* $NetBSD: udf_strat_rmw.c,v 1.21 2009/07/06 17:13:38 reinoud Exp $ */
 
 /*
  * Copyright (c) 2006, 2008 Reinoud Zandijk
@@ -28,7 +28,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.20 2009/07/02 16:56:35 reinoud Exp $");
+__KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.21 2009/07/06 17:13:38 reinoud Exp $");
 #endif /* not lint */
 
 
@@ -79,7 +79,7 @@
 #define UDF_SHED_READING	2
 #define UDF_SHED_WRITING	3
 #define UDF_SHED_SEQWRITING	4
-#define UDF_SHED_IDLE		5			/* resting */
+#define UDF_SHED_IDLE		5			/* refcnt'd */
 #define UDF_SHED_FREE		6			/* recycleable */
 #define UDF_SHED_MAX		6+1
 
@@ -105,6 +105,9 @@
 	uint32_t		  flags;
 	uint32_t		  start_sector;		/* physical */
 
+	const char		 *fname;
+	int			  sline;
+
 	struct buf		 *buf;
 	void			 *blob;
 
@@ -141,22 +144,29 @@
 
 /* --------------------------------------------------------------------- */
 
-#define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
-#define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
+#define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline, __FILE__, __LINE__)
+#define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline, __FILE__, __LINE__)
 
 /* can be called with or without discstrat lock */
 static void
-udf_lock_eccline(struct udf_eccline *eccline)
+udf_lock_eccline(struct udf_eccline *eccline, const char *fname, int sline)
 {
 	struct strat_private *priv = PRIV(eccline->ump);
 	int waslocked, ret;
 
+	KASSERT(mutex_owned(&priv->discstrat_mutex));
+
 	waslocked = mutex_owned(&priv->discstrat_mutex);
 	if (!waslocked)
 		mutex_enter(&priv->discstrat_mutex);
 
 	/* wait until its unlocked first */
+	eccline->refcnt++;
 	while (eccline->flags & ECC_LOCKED) {
+		DPRINTF(ECCLINE, ("waiting for lock at %s:%d\n",
+					fname, sline));
+		DPRINTF(ECCLINE, ("was locked at %s:%d\n",
+					eccline->fname, eccline->sline));
 		eccline->flags |= ECC_WANTED;
 		ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
 			hz/8);
@@ -166,6 +176,10 @@
 	}
 	eccline->flags |= ECC_LOCKED;
 	eccline->flags &= ~ECC_WANTED;
+	eccline->refcnt--;
+
+	eccline->fname = fname;
+	eccline->sline = sline;
 
 	if (!waslocked)
 		mutex_exit(&priv->discstrat_mutex);
@@ -174,11 +188,13 @@
 
 /* can be called with or without discstrat lock */
 static void
-udf_unlock_eccline(struct udf_eccline *eccline)
+udf_unlock_eccline(struct udf_eccline *eccline, const char *fname, int sline)
 {
 	struct strat_private *priv = PRIV(eccline->ump);
 	int waslocked;
 
+	KASSERT(mutex_owned(&priv->discstrat_mutex));
+
 	waslocked = mutex_owned(&priv->discstrat_mutex);
 	if (!waslocked)
 		mutex_enter(&priv->discstrat_mutex);
@@ -196,28 +212,21 @@
 udf_dispose_eccline(struct udf_eccline *eccline)
 {
 	struct strat_private *priv = PRIV(eccline->ump);
-	struct buf *ret;
 
 	KASSERT(mutex_owned(&priv->discstrat_mutex));
 
-	KASSERT(eccline->refcnt == 0);
-	KASSERT(eccline->dirty  == 0);
-
 	DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
 		"present %0"PRIx64"\n", eccline->start_sector,
 		eccline->present));
 
-	if (eccline->queued_on) {
-		ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf);
-		KASSERT(ret == eccline->buf);
-		priv->num_queued[eccline->queued_on]--;
-	}
-	LIST_REMOVE(eccline, hashchain);
+	KASSERT(eccline->refcnt == 0);
+	KASSERT(eccline->dirty  == 0);
+	KASSERT(eccline->queued_on == 0);
+	KASSERT(eccline->flags & ECC_FLOATING);
+	KASSERT(eccline->flags & ECC_LOCKED);
 
-	if (eccline->flags & ECC_FLOATING) {
-		eccline->flags &= ~ECC_FLOATING;
-		priv->num_floating--;
-	}
+	LIST_REMOVE(eccline, hashchain);
+	priv->num_floating--;
 
 	putiobuf(eccline->buf);
 	pool_put(&priv->ecclineblob_pool, eccline->blob);
@@ -230,72 +239,69 @@
 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
 {
 	struct strat_private *priv = PRIV(eccline->ump);
-	struct buf *ret;
-	int curqueue;
 
 	KASSERT(mutex_owned(&priv->discstrat_mutex));
 
 	DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
 
-	/* requeue */
-	curqueue = eccline->queued_on;
-	if (curqueue) {
-		ret = bufq_cancel(priv->queues[curqueue], eccline->buf);
-
-#ifdef DIAGNOSTIC
-		if (ret == NULL) {
-			int i;
-
-			printf("udf_push_eccline: bufq_cancel can't find "
-				"buffer %p on queue %d; "
-				"dumping queues\n", eccline->buf, curqueue);
-			for (i = 1; i < UDF_SHED_MAX; i++) {
-				printf("queue %d\n\t", i);
-				ret = bufq_get(priv->queues[i]);
-				while (ret) {
-					printf("%p ", ret);
-					if (BTOE(ret)->queued_on != i)
-						printf("WRONGQ ");
-					if (ret == eccline->buf)
-						printf("[<-] ");
-					if (ret == bufq_peek(priv->queues[i])) {
-						printf("LOOP ");
-						break;
-					}
-					ret = bufq_get(priv->queues[i]);
-				}
-				printf("\n");
-			}
-			panic("fatal queue bug; exit");
-		}
-#endif
-
-		KASSERT(ret == eccline->buf);
-		priv->num_queued[curqueue]--;
-	}
+	KASSERT(eccline->queued_on == 0);
+	KASSERT(eccline->flags & ECC_FLOATING);
 
 	/* set buffer block numbers to make sure its queued correctly */
 	eccline->buf->b_lblkno   = eccline->start_sector;
 	eccline->buf->b_blkno    = eccline->start_sector;
 	eccline->buf->b_rawblkno = eccline->start_sector;
 
+	vfs_timestamp(&priv->last_queued[newqueue]);
+	eccline->flags &= ~ECC_FLOATING;
+	priv->num_floating--;
 	eccline->queued_on = newqueue;
 	priv->num_queued[newqueue]++;
 	bufq_put(priv->queues[newqueue], eccline->buf);
-	vfs_timestamp(&priv->last_queued[newqueue]);
 
-	if (eccline->flags & ECC_FLOATING) {
-		eccline->flags &= ~ECC_FLOATING;
-		priv->num_floating--;
-	}
+	UDF_UNLOCK_ECCLINE(eccline);
 
-	/* tickle disc strategy statemachine */
+	/* XXX tickle disc strategy statemachine */
 	if (newqueue != UDF_SHED_IDLE)
 		cv_signal(&priv->discstrat_cv);
 }
 
 
 static struct udf_eccline *
+udf_peek_eccline(struct strat_private *priv, int queued_on)
+{
+	struct udf_eccline *eccline;
+	struct buf *buf;
+
+	KASSERT(mutex_owned(&priv->discstrat_mutex));
+
+	for(;;) {
+		buf = bufq_peek(priv->queues[queued_on]);
+		/* could have been a race, but we'll revisit later */
+		if (buf == NULL)
+			return NULL;
+
+		eccline = BTOE(buf);
+		UDF_LOCK_ECCLINE(eccline);
+
+		/* might have changed before we obtained the lock */
+		if (eccline->queued_on == queued_on)
+			break;
+
+		UDF_UNLOCK_ECCLINE(eccline);
+	}
+
+	KASSERT(eccline->queued_on == queued_on);
+	KASSERT((eccline->flags & ECC_FLOATING) == 0);
+
+	DPRINTF(PARANOIA, ("DEBUG: buf %p peeked at queue %d\n",
+		eccline->buf, queued_on));
+
+	return eccline;
+}
+
+
+static struct udf_eccline *
 udf_pop_eccline(struct strat_private *priv, int queued_on)
 {
 	struct udf_eccline *eccline;
@@ -303,19 +309,29 @@
 
 	KASSERT(mutex_owned(&priv->discstrat_mutex));
 
-	buf = bufq_get(priv->queues[queued_on]);
-	if (buf == NULL) {
-		KASSERT(priv->num_queued[queued_on] == 0);
-		return NULL;
+	for(;;) {
+		buf = bufq_get(priv->queues[queued_on]);
+		if (buf == NULL) {
+			// KASSERT(priv->num_queued[queued_on] == 0);
+			return NULL;
+		}
+
+		eccline = BTOE(buf);
+		UDF_LOCK_ECCLINE(eccline);
+
+		/* might have changed before we obtained the lock */
+		if (eccline->queued_on == queued_on)
+			break;
+
+		UDF_UNLOCK_ECCLINE(eccline);
 	}
 
-	eccline = BTOE(buf);
 	KASSERT(eccline->queued_on == queued_on);
-	eccline->queued_on = 0;
+	KASSERT((eccline->flags & ECC_FLOATING) == 0);
+
 	priv->num_queued[queued_on]--;
+	eccline->queued_on = 0;
 
-	if (eccline->flags & ECC_FLOATING)
-		panic("popping already marked floating eccline");
 	eccline->flags |= ECC_FLOATING;
 	priv->num_floating++;
 
@@ -326,6 +342,28 @@
 }
 
 
+static void
+udf_unqueue_eccline(struct strat_private *priv, struct udf_eccline *eccline)
+{
+	struct buf *ret;
+
+	UDF_LOCK_ECCLINE(eccline);
+	if (eccline->queued_on == 0) {
+		KASSERT(eccline->flags & ECC_FLOATING);
+		return;
+	}
+
+	ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf);
+	KASSERT(ret == eccline->buf);
+
+	priv->num_queued[eccline->queued_on]--;
+	eccline->queued_on = 0;
+
+	eccline->flags |= ECC_FLOATING;
+	priv->num_floating++;
+}
+
+
 static struct udf_eccline *
 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
 {
@@ -336,11 +374,13 @@
 	int line, line_offset;
 	int num_busy, ret;
 
+	mutex_enter(&priv->discstrat_mutex);
+
+	/* lookup in our line cache hashtable */
 	line_offset  = sector % ump->packet_size;
 	start_sector = sector - line_offset;
 	line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
 
-	mutex_enter(&priv->discstrat_mutex);
 	KASSERT(priv->thread_running);
 
 retry:
@@ -349,22 +389,15 @@
 		if (eccline->start_sector == start_sector) {
 			DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
 				eccline->start_sector));
+			udf_unqueue_eccline(priv, eccline);
 
-			UDF_LOCK_ECCLINE(eccline);
-			/* move from freelist (!) */
-			if (eccline->queued_on == UDF_SHED_FREE) {
-				DPRINTF(ECCLINE, ("was on freelist\n"));
-				KASSERT(eccline->refcnt == 0);
-				udf_push_eccline(eccline, UDF_SHED_IDLE);
-			}
-			eccline->refcnt++;
 			mutex_exit(&priv->discstrat_mutex);
 			return eccline;
 		}
 	}
 
-	DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
 	/* not found in eccline cache */
+	DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
 
 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
 	blobsize = ump->packet_size * lb_size;
@@ -396,23 +429,21 @@
 		}
 
 		/* push back line if we're waiting for it or its locked */
-		if (eccline->flags & (ECC_WANTED | ECC_LOCKED)) {
-			/* XXX what were they doing on the free list anyway? */
-			udf_push_eccline(eccline, UDF_SHED_WAITING);
+		if (eccline->flags & ECC_WANTED) {
+			/* we won a race, but someone else needed it */
+			udf_push_eccline(eccline, UDF_SHED_FREE);
 			goto retry;
 		}
 
 		/* unlink this entry */
 		LIST_REMOVE(eccline, hashchain);
-
 		KASSERT(eccline->flags & ECC_FLOATING);
-	
+		KASSERT(eccline->queued_on == 0);
+
 		eccline_blob = eccline->blob;
-		memset(eccline, 0, sizeof(struct udf_eccline));
-		eccline->flags = ECC_FLOATING;
+		eccline->flags = ECC_FLOATING | ECC_LOCKED;
 	} else {
-		memset(eccline, 0, sizeof(struct udf_eccline));
-		eccline->flags = ECC_FLOATING;
+		eccline->flags = ECC_FLOATING | ECC_LOCKED;
 		priv->num_floating++;
 	}
 
@@ -422,12 +453,14 @@
 	eccline->buf->b_private = eccline;	/* IMPORTANT */
 
 	/* initialise eccline blob */
+	/* XXX memset expensive and strictly not needed XXX */
 	memset(eccline->blob, 0, blobsize);
 
 	eccline->ump = ump;
 	eccline->present = eccline->readin = eccline->dirty = 0;
 	eccline->error = 0;
 	eccline->refcnt = 0;
+	memset(eccline->bufs, 0, UDF_MAX_PACKET_SIZE * sizeof(struct buf *));
 
 	eccline->start_sector    = start_sector;
 	eccline->buf->b_lblkno   = start_sector;
@@ -440,9 +473,10 @@
 	 * TODO possible optimalisation for checking overlap with partitions
 	 * to get a clue on future eccline usage
 	 */
-	eccline->refcnt++;
-	UDF_LOCK_ECCLINE(eccline);
 
+	KASSERT(eccline->refcnt == 0);
+	KASSERT(eccline->flags & ECC_FLOATING);
+	KASSERT(eccline->flags & ECC_LOCKED);
 	mutex_exit(&priv->discstrat_mutex);
 
 	return eccline;
@@ -455,32 +489,45 @@
 	struct strat_private *priv = PRIV(eccline->ump);
 	struct udf_mount *ump = eccline->ump;
 	uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
+	int new_queue;
 
 	mutex_enter(&priv->discstrat_mutex);
 
-	/* clear directly all readin requests from present ones */
-	if (eccline->readin & eccline->present) {
-		/* clear all read bits that are already read in */
-		eccline->readin &= (~eccline->present) & allbits;
-		wakeup(eccline);
-	}
-
 	DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
 		eccline->start_sector, eccline->refcnt));
 
+	KASSERT(eccline->flags & ECC_LOCKED);
+	KASSERT(eccline->flags & ECC_FLOATING);
+
+	/* clear all read bits that are already read in */
+	if (eccline->readin & eccline->present)
+		eccline->readin &= (~eccline->present) & allbits;
+
 	/* if we have active nodes we dont set it on seqwriting */
 	if (eccline->refcnt > 1)
 		eccline->flags &= ~ECC_SEQWRITING;
 
-	vfs_timestamp(&eccline->wait_time);
-	eccline->wait_time.tv_sec += ECC_WAITTIME;
-	udf_push_eccline(eccline, UDF_SHED_WAITING);
-
-	KASSERT(eccline->refcnt >= 1);
-	eccline->refcnt--;
-	UDF_UNLOCK_ECCLINE(eccline);
+	/* select state */
+	new_queue = UDF_SHED_FREE;
+	if (eccline->refcnt > 0)
+		new_queue = UDF_SHED_IDLE;
+	if (eccline->flags & ECC_WANTED)
+		new_queue = UDF_SHED_IDLE;
+	if (eccline->readin)
+		new_queue = UDF_SHED_READING;
+	if (eccline->dirty) {
+		new_queue = UDF_SHED_WAITING;
+		vfs_timestamp(&eccline->wait_time);
+		eccline->wait_time.tv_sec += ECC_WAITTIME;
+
+		if (eccline->present == allbits) {
+			new_queue = UDF_SHED_WRITING;
+			if (eccline->flags & ECC_SEQWRITING)
+				new_queue = UDF_SHED_SEQWRITING;
+		}
+	}
+	udf_push_eccline(eccline, new_queue);
 
-	wakeup(eccline);
 	mutex_exit(&priv->discstrat_mutex);
 }
 
@@ -545,7 +592,10 @@
 	eccsect = sectornr - eccline->start_sector;
 
 	bit = (uint64_t) 1 << eccsect;
+	KASSERT(eccline->present & bit);
+
 	eccline->readin &= ~bit;	/* just in case */
+	/* XXX eccline->dirty? */
 
 	KASSERT(eccline->refcnt >= 1);
 	eccline->refcnt--;
@@ -560,6 +610,7 @@
 	union dscrptr   **dscrptr = &args->dscr;
 	struct udf_mount *ump = args->ump;
 	struct long_ad   *icb = args->icb;
+	struct strat_private *priv;
 	struct udf_eccline *eccline;
 	uint64_t bit;
 	uint32_t sectornr, dummy;
@@ -586,22 +637,28 @@
 		KASSERT(eccline->bufs[eccsect] == NULL);
 		udf_puteccline(eccline);
 
-		/* wait for completion; XXX remodel to lock bit code */
-		error = 0;
-		while ((eccline->present & bit) == 0) {
-			tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
-			if (eccline->error & bit) {
-				KASSERT(eccline->refcnt >= 1);
-				eccline->refcnt--;	/* undo temp refcnt */
-				*dscrptr = NULL;
-				return EIO;		/* XXX error code */
-			}
+		/* wait for completion */
+		priv = PRIV(eccline->ump);
+		mutex_enter(&priv->discstrat_mutex);
+		while (((eccline->present | eccline->error) & bit) == 0) {
+			error = cv_timedwait(&priv->discstrat_cv,
+				&priv->discstrat_mutex,
+				hz/8);
+			if (error == EWOULDBLOCK)
+				DPRINTF(LOCKING, ("eccline waiting for read\n"));
 		}
+		mutex_exit(&priv->discstrat_mutex);
 
 		/* reget our line */
 		eccline = udf_geteccline(ump, sectornr, 0);
 		KASSERT(eccline->refcnt >= 1);
 		eccline->refcnt--;	/* undo refcnt */
+
+		if (eccline->error & bit) {
+			*dscrptr = NULL;
+			udf_puteccline(eccline);
+			return EIO;		/* XXX error code */
+		}
 	}
 
 	*dscrptr = (union dscrptr *)
@@ -634,6 +691,7 @@
 		return error;
 	}
 
+	/* we have a hold since it has a node descriptor */
 	eccline->refcnt++;
 	udf_puteccline(eccline);
 
@@ -663,7 +721,7 @@
 	if (error)
 		return error;
 
-	/* add reference to the vnode to prevent recycling */
+	/* paranoia: add reference to the vnode to prevent recycling */
 	vhold(udf_node->vnode);
 
 	/* get our eccline */
@@ -699,14 +757,14 @@
 
 	KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
 
-	udf_puteccline(eccline);
-
-	holdrele(udf_node->vnode);
 	udf_node->outstanding_nodedscr--;
 	if (udf_node->outstanding_nodedscr == 0) {
+		/* XXX still using wakeup! */
 		UDF_UNLOCK_NODE(udf_node, 0);
 		wakeup(&udf_node->outstanding_nodedscr);
 	}
+	holdrele(udf_node->vnode);
+	udf_puteccline(eccline);
 
 	/* XXX waitfor not used */
 	return 0;
@@ -765,7 +823,7 @@
 		bpos = 0;
 		while (buf_len) {
 			len = MIN(buf_len, sector_size);
-			if (eccsect == ump->packet_size) {
+			if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
 				udf_puteccline(eccline);
 				eccline = udf_geteccline(ump, sectornr, 0);
 				eccsect = sectornr - eccline->start_sector;
@@ -800,24 +858,11 @@
 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
+
 		/* if we have FIDs fixup using buffer's sector number(s) */
-		if (buf->b_udf_c_type == UDF_C_FIDS) {
+		if (buf->b_udf_c_type == UDF_C_FIDS)
 			panic("UDF_C_FIDS in SHED_WRITING!\n");
-#if 0
-			buf_len = buf->b_bcount;
-			sectornr = our_sectornr;
-			bpos = 0;
-			while (buf_len) {
-				len = MIN(buf_len, sector_size);
-				fidblk = (uint8_t *) buf->b_data + bpos;
-				udf_fixup_fid_block(fidblk, sector_size,
-					0, len, sectornr);
-				sectornr++;
-				bpos += len;
-				buf_len -= len;
-			}
-#endif
-		}
+
 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
 
 		/* copy parts into the bufs and set for writing */
@@ -828,7 +873,7 @@
 		bpos = 0;
 		while (buf_len) {
 			len = MIN(buf_len, sector_size);
-			if (eccsect == ump->packet_size) {
+			if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
 				udf_puteccline(eccline);
 				eccline = udf_geteccline(ump, sectornr, 0);
 				eccsect = sectornr - eccline->start_sector;
@@ -976,8 +1021,10 @@
 	int sector_size = ump->discinfo.sector_size;
 	int error, i, len;
 
-	DPRINTF(ECCLINE, ("read callback called\n"));
+	DPRINTF(ECCLINE, ("read callback called on buf %p\n", buf));
+
 	/* post process read action */
+	KASSERT(eccline->flags & ECC_LOCKED);
 	error = buf->b_error;
 	for (i = 0; i < ump->packet_size; i++) {
 		bit = (uint64_t) 1 << i;
@@ -1008,7 +1055,6 @@
 	 */
 
 	udf_puteccline(eccline);
-
 	DPRINTF(ECCLINE, ("read callback finished\n"));
 }
 
@@ -1019,10 +1065,12 @@
 	struct udf_eccline *eccline = BTOE(buf);
 	struct udf_mount *ump = eccline->ump;
 	uint64_t bit;
-	int error, i, len;
+	int error, i;
+
+	DPRINTF(ECCLINE, ("write callback called on buf %p\n", buf));
 
-	DPRINTF(ECCLINE, ("write callback called buf %p\n", buf));
 	/* post process write action */
+	KASSERT(eccline->flags & ECC_LOCKED);
 	error = buf->b_error;
 	for (i = 0; i < ump->packet_size; i++) {
 		bit = (uint64_t) 1 << i;
@@ -1033,20 +1081,18 @@
 		} else {
 			eccline->dirty &= ~bit;
 		}
-		if (eccline->bufs[i]) {
-			len = eccline->bufs_len[i];
-			nestiobuf_done(eccline->bufs[i], len, error);
-			eccline->bufs[i] = NULL;
-		}
+
+		KASSERT(eccline->bufs[i] == 0);
 	}
 	KASSERT(eccline->dirty == 0);
-
 	KASSERT(error == 0);
+
 	/*
 	 * XXX TODO on write errors allocate a sparable entry and reissue
 	 */
 
 	udf_puteccline(eccline);
+	DPRINTF(ECCLINE, ("write callback finished\n"));
 }
 
 
@@ -1062,6 +1108,8 @@
 	int blks = sector_size / DEV_BSIZE;
 	int i;
 
+	KASSERT(eccline->flags & ECC_LOCKED);
+
 	if (queued_on == UDF_SHED_READING) {
 		DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
 		/* read all bits that are not yet present */
@@ -1096,10 +1144,13 @@
 				nestbuf->b_blkno = buf->b_blkno + i*blks;
 				nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
 
-				DPRINTF(SHEDULE, ("sector %d ",
-					start + i));
-				/* call asynchronous */
-				VOP_STRATEGY(ump->devvp, nestbuf);
+				DPRINTF(SHEDULE, ("sector %d ", start + i));
+
+				/* mutex dance since it could lock */
+				mutex_exit(&priv->discstrat_mutex);
+					/* call asynchronous */
+					VOP_STRATEGY(ump->devvp, nestbuf);
+				mutex_enter(&priv->discstrat_mutex);
 			}
 			DPRINTF(SHEDULE, ("\n"));
 			return;
@@ -1110,13 +1161,8 @@
 		DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", "
 			"dirty %"PRIx64"\n\t", eccline->present, eccline->readin,
 			eccline->dirty));
-		if (eccline->present != allbits) {
-			/* requeue to read-only */
-			DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
-				"reading\n"));
-			udf_push_eccline(eccline, UDF_SHED_READING);
-			return;
-		}
+		KASSERT(eccline->present == allbits);
+
 		start = eccline->start_sector;
 		buf = eccline->buf;
 		buf->b_flags    = B_WRITE | B_ASYNC;
@@ -1133,6 +1179,7 @@
 		buf->b_proc     = NULL;
 	}
 
+	/* mutex dance since it could lock */
 	mutex_exit(&priv->discstrat_mutex);
 		/* call asynchronous */
 		DPRINTF(SHEDULE, ("sector %d for %d\n",
@@ -1150,7 +1197,7 @@
 	struct udf_eccline *eccline;
 	struct timespec now, *last;
 	uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
-	int new_queue, wait, work, num, cnt;
+	int new_queue, wait, work;
 
 	work = 1;
 	priv->thread_running = 1;
@@ -1161,12 +1208,26 @@
 		vfs_timestamp(&now);
 
 		/* maintenance: handle eccline state machine */
-		num = priv->num_queued[UDF_SHED_WAITING];
-		cnt = 0;
-		while (cnt < num) {
+		for(;;) {
+			/* only peek at it */
+			eccline = udf_peek_eccline(priv, UDF_SHED_WAITING);
+			if (eccline == NULL)
+				break;
+
+			/* all others are later, so if not satisfied, abort */
+			if ((eccline->wait_time.tv_sec - now.tv_sec > 0)) {
+				UDF_UNLOCK_ECCLINE(eccline);
+				break;
+			}
+
+			/* release */
+			UDF_UNLOCK_ECCLINE(eccline);
+
+			/* do get it */
 			eccline = udf_pop_eccline(priv, UDF_SHED_WAITING);
-			/* requeue */
-			new_queue = UDF_SHED_FREE;
+
+			/* requeue according to state */
+			new_queue = UDF_SHED_FREE;	/* unlikely */
 			if (eccline->refcnt > 0)
 				new_queue = UDF_SHED_IDLE;
 			if (eccline->flags & ECC_WANTED)
@@ -1174,22 +1235,14 @@
 			if (eccline->readin)
 				new_queue = UDF_SHED_READING;
 			if (eccline->dirty) {
-				new_queue = UDF_SHED_WAITING;
-				if ((eccline->wait_time.tv_sec - now.tv_sec <= 0) ||
-				   ((eccline->present == allbits) &&
-				    (eccline->flags & ECC_SEQWRITING))) 
-				{
+				new_queue = UDF_SHED_READING;
+				if (eccline->present == allbits) {
 					new_queue = UDF_SHED_WRITING;
 					if (eccline->flags & ECC_SEQWRITING)
 						new_queue = UDF_SHED_SEQWRITING;
-					if (eccline->present != allbits)
-						new_queue = UDF_SHED_READING;
 				}
 			}
-			if (eccline->flags & ECC_LOCKED)
-				new_queue = UDF_SHED_WAITING;
 			udf_push_eccline(eccline, new_queue);
-			cnt++;
 		}
 
 		/* maintenance: free excess ecclines */
@@ -1197,9 +1250,10 @@
 			eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
 			KASSERT(eccline);
 			KASSERT(eccline->refcnt == 0);
-			if (eccline->flags & (ECC_WANTED | ECC_LOCKED)) {
-				udf_push_eccline(eccline, UDF_SHED_IDLE);
+			if (eccline->flags & ECC_WANTED) {
+				/* we won the race, but we dont want to win */
 				DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
+				udf_push_eccline(eccline, UDF_SHED_IDLE);
 			} else {
 				DPRINTF(ECCLINE, ("Removing entry from free list\n"));
 				udf_dispose_eccline(eccline);
@@ -1218,10 +1272,6 @@
 			new_queue = priv->cur_queue;
 			DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
 
-			/* complete the `get' by locking and refcounting it */
-			UDF_LOCK_ECCLINE(eccline);
-			eccline->refcnt++;
-
 			udf_issue_eccline(eccline, priv->cur_queue);
 		} else {
 			/* don't switch too quickly */
@@ -1278,17 +1328,18 @@
 
 	/* tear down remaining ecclines */
 	mutex_enter(&priv->discstrat_mutex);
+	KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL);
+	KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL);
+	KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
+	KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
+	KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
+
 	KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0);
 	KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
 	KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
 	KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
 	KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
 
-	KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL);
-	KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL);
-	KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
-	KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
-	KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
 	eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
 	while (eccline) {
 		udf_dispose_eccline(eccline);

Reply via email to