Module Name:    src
Committed By:   hannken
Date:           Mon Feb 21 09:29:21 UTC 2011

Modified Files:
        src/sys/ufs/ffs: ffs_snapshot.c

Log Message:
Change the snapshot lock:
- No need to take the snapshot lock while the file system is suspended.
- Allow ffs_copyonwrite() one level of recursion with snapshots locked.
- Do the block address lookup with snapshots locked.
- Take the snapshot lock while removing a snapshot from the list.

While hunting deadlocks change the transaction scope for ffs_snapremove().
We could deadlock from UFS_WAPBL_BEGIN() with a buffer held.


To generate a diff of this commit:
cvs rdiff -u -r1.105 -r1.106 src/sys/ufs/ffs/ffs_snapshot.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/ufs/ffs/ffs_snapshot.c
diff -u src/sys/ufs/ffs/ffs_snapshot.c:1.105 src/sys/ufs/ffs/ffs_snapshot.c:1.106
--- src/sys/ufs/ffs/ffs_snapshot.c:1.105	Fri Feb 18 14:48:54 2011
+++ src/sys/ufs/ffs/ffs_snapshot.c	Mon Feb 21 09:29:21 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_snapshot.c,v 1.105 2011/02/18 14:48:54 bouyer Exp $	*/
+/*	$NetBSD: ffs_snapshot.c,v 1.106 2011/02/21 09:29:21 hannken Exp $	*/
 
 /*
  * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.105 2011/02/18 14:48:54 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.106 2011/02/21 09:29:21 hannken Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -79,6 +79,7 @@
 struct snap_info {
 	kmutex_t si_lock;			/* Lock this snapinfo */
 	kmutex_t si_snaplock;			/* Snapshot vnode common lock */
+	lwp_t *si_owner;			/* Sanplock owner */
 	TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */
 	daddr_t *si_snapblklist;		/* Snapshot block hints list */
 	uint32_t si_gen;			/* Incremented on change */
@@ -140,6 +141,7 @@
 	TAILQ_INIT(&si->si_snapshots);
 	mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE);
+	si->si_owner = NULL;
 	si->si_gen = 0;
 	si->si_snapblklist = NULL;
 
@@ -173,7 +175,6 @@
 }
 #else /* defined(FFS_NO_SNAPSHOT) */
 	bool suspended = false;
-	bool snapshot_locked = false;
 	int error, redo = 0, snaploc;
 	void *sbbuf = NULL;
 	daddr_t *snaplist = NULL, snaplistsize = 0;
@@ -269,11 +270,6 @@
 	if (error)
 		goto out;
 	/*
-	 * Acquire the snapshot lock.
-	 */
-	mutex_enter(&si->si_snaplock);
-	snapshot_locked = true;
-	/*
 	 * Record snapshot inode. Since this is the newest snapshot,
 	 * it must be placed at the end of the list.
 	 */
@@ -376,8 +372,6 @@
 	si->si_gen++;
 	mutex_exit(&si->si_lock);
 
-	if (snapshot_locked)
-		mutex_exit(&si->si_snaplock);
 	if (suspended) {
 		vfs_resume(vp->v_mount);
 #ifdef DEBUG
@@ -1354,8 +1348,7 @@
 	struct snap_info *si;
 	struct lwp *l = curlwp;
 	daddr_t numblks, blkno, dblk;
-	int error, loc, last, n;
-	const int wbreak = blocks_in_journal(fs)/8;
+	int error, loc, last;
 
 	si = VFSTOUFS(mp)->um_snapinfo;
 	/*
@@ -1365,6 +1358,7 @@
 	 *
 	 * Clear copy-on-write flag if last snapshot.
 	 */
+	mutex_enter(&si->si_snaplock);
 	mutex_enter(&si->si_lock);
 	if (is_active_snapshot(si, ip)) {
 		TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap);
@@ -1374,18 +1368,22 @@
 			si->si_snapblklist = xp->i_snapblklist;
 			si->si_gen++;
 			mutex_exit(&si->si_lock);
+			mutex_exit(&si->si_snaplock);
 		} else {
 			si->si_snapblklist = 0;
 			si->si_gen++;
 			mutex_exit(&si->si_lock);
+			mutex_exit(&si->si_snaplock);
 			fscow_disestablish(mp, ffs_copyonwrite, devvp);
 		}
 		if (ip->i_snapblklist != NULL) {
 			free(ip->i_snapblklist, M_UFSMNT);
 			ip->i_snapblklist = NULL;
 		}
-	} else
+	} else {
 		mutex_exit(&si->si_lock);
+		mutex_exit(&si->si_snaplock);
+	}
 	/*
 	 * Clear all BLK_NOCOPY fields. Pass any block claims to other
 	 * snapshots that want them (see ffs_snapblkfree below).
@@ -1402,7 +1400,7 @@
 		}
 	}
 	numblks = howmany(ip->i_size, fs->fs_bsize);
-	for (blkno = NDADDR, n = 0; blkno < numblks; blkno += NINDIR(fs)) {
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
 		error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno),
 		    fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
 		if (error)
@@ -1412,12 +1410,6 @@
 		else
 			last = fs->fs_size - blkno;
 		for (loc = 0; loc < last; loc++) {
-			if (wbreak > 0 && (++n % wbreak) == 0) {
-				UFS_WAPBL_END(mp);
-				error = UFS_WAPBL_BEGIN(mp);
-				if (error)
-					panic("UFS_WAPBL_BEGIN failed");
-			}
 			dblk = idb_get(ip, ibp->b_data, loc);
 			if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
 				idb_assign(ip, ibp->b_data, loc, 0);
@@ -1429,6 +1421,9 @@
 			}
 		}
 		bawrite(ibp);
+		UFS_WAPBL_END(mp);
+		error = UFS_WAPBL_BEGIN(mp);
+		KASSERT(error == 0);
 	}
 	/*
 	 * Clear snapshot flag and drop reference.
@@ -1469,25 +1464,18 @@
 	daddr_t lbn;
 	daddr_t blkno;
 	uint32_t gen;
-	int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0;
+	int indiroff = 0, error = 0, claimedblk = 0;
 
 	si = VFSTOUFS(mp)->um_snapinfo;
 	lbn = fragstoblks(fs, bno);
+	mutex_enter(&si->si_snaplock);
 	mutex_enter(&si->si_lock);
+	si->si_owner = curlwp;
+		
 retry:
 	gen = si->si_gen;
 	TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
 		vp = ITOV(ip);
-		if (snapshot_locked == 0) {
-			if (!mutex_tryenter(&si->si_snaplock)) {
-				mutex_exit(&si->si_lock);
-				mutex_enter(&si->si_snaplock);
-				mutex_enter(&si->si_lock);
-			}
-			snapshot_locked = 1;
-			if (gen != si->si_gen)
-				goto retry;
-		}
 		/*
 		 * Lookup block being written.
 		 */
@@ -1584,6 +1572,9 @@
 				error = syncsnap(vp);
 			else
 				error = 0;
+			mutex_enter(&si->si_lock);
+			si->si_owner = NULL;
+			mutex_exit(&si->si_lock);
 			mutex_exit(&si->si_snaplock);
 			return (error == 0);
 		}
@@ -1623,7 +1614,9 @@
 		if (gen != si->si_gen)
 			goto retry;
 	}
+	si->si_owner = NULL;
 	mutex_exit(&si->si_lock);
+	mutex_exit(&si->si_snaplock);
 	if (saved_data)
 		free(saved_data, M_UFSMNT);
 	/*
@@ -1632,8 +1625,6 @@
 	 * not be freed. Although space will be lost, the snapshot
 	 * will stay consistent.
 	 */
-	if (snapshot_locked)
-		mutex_exit(&si->si_snaplock);
 	return (error);
 }
 
@@ -1846,6 +1837,15 @@
 	/*
 	 * Not in the precomputed list, so check the snapshots.
 	 */
+	 if (si->si_owner != curlwp) {
+		if (!mutex_tryenter(&si->si_snaplock)) {
+			mutex_exit(&si->si_lock);
+			mutex_enter(&si->si_snaplock);
+			mutex_enter(&si->si_lock);
+		}
+		si->si_owner = curlwp;
+		snapshot_locked = 1;
+	 }
 	 if (data_valid && bp->b_bcount == fs->fs_bsize)
 		saved_data = bp->b_data;
 retry:
@@ -1886,34 +1886,8 @@
 			error = ENOMEM;
 			break;
 		}
-
-		if (snapshot_locked == 0) {
-			if (!mutex_tryenter(&si->si_snaplock)) {
-				mutex_exit(&si->si_lock);
-				mutex_enter(&si->si_snaplock);
-				mutex_enter(&si->si_lock);
-			}
-			snapshot_locked = 1;
-			if (gen != si->si_gen)
-				goto retry;
-
-			/* Check again if block still needs to be copied */
-			if (lbn < NDADDR) {
-				blkno = db_get(ip, lbn);
-			} else {
-				mutex_exit(&si->si_lock);
-				if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) {
-					mutex_enter(&si->si_lock);
-					break;
-				}
-				mutex_enter(&si->si_lock);
-				if (gen != si->si_gen)
-					goto retry;
-			}
-
-			if (blkno != 0)
-				continue;
-		}
+		/* Only one level of recursion allowed. */
+		KASSERT(snapshot_locked);
 		/*
 		 * Allocate the block into which to do the copy. Since
 		 * multiple processes may all try to copy the same block,
@@ -1968,11 +1942,14 @@
 	 * have not been unlinked, and hence will be visible after
 	 * a crash, to ensure their integrity.
 	 */
-	mutex_exit(&si->si_lock);
+	if (snapshot_locked) {
+		si->si_owner = NULL;
+		mutex_exit(&si->si_lock);
+		mutex_exit(&si->si_snaplock);
+	} else
+		mutex_exit(&si->si_lock);
 	if (saved_data && saved_data != bp->b_data)
 		free(saved_data, M_UFSMNT);
-	if (snapshot_locked)
-		mutex_exit(&si->si_snaplock);
 	return error;
 }
 

Reply via email to