Module Name: src Committed By: hannken Date: Mon Feb 21 09:29:21 UTC 2011
Modified Files: src/sys/ufs/ffs: ffs_snapshot.c Log Message: Change the snapshot lock: - No need to take the snapshot lock while the file system is suspended. - Allow ffs_copyonwrite() one level of recursion with snapshots locked. - Do the block address lookup with snapshots locked. - Take the snapshot lock while removing a snapshot from the list. While hunting deadlocks change the transaction scope for ffs_snapremove(). We could deadlock from UFS_WAPBL_BEGIN() with a buffer held. To generate a diff of this commit: cvs rdiff -u -r1.105 -r1.106 src/sys/ufs/ffs/ffs_snapshot.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/ufs/ffs/ffs_snapshot.c diff -u src/sys/ufs/ffs/ffs_snapshot.c:1.105 src/sys/ufs/ffs/ffs_snapshot.c:1.106 --- src/sys/ufs/ffs/ffs_snapshot.c:1.105 Fri Feb 18 14:48:54 2011 +++ src/sys/ufs/ffs/ffs_snapshot.c Mon Feb 21 09:29:21 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_snapshot.c,v 1.105 2011/02/18 14:48:54 bouyer Exp $ */ +/* $NetBSD: ffs_snapshot.c,v 1.106 2011/02/21 09:29:21 hannken Exp $ */ /* * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.105 2011/02/18 14:48:54 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.106 2011/02/21 09:29:21 hannken Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -79,6 +79,7 @@ struct snap_info { kmutex_t si_lock; /* Lock this snapinfo */ kmutex_t si_snaplock; /* Snapshot vnode common lock */ + lwp_t *si_owner; /* Sanplock owner */ TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */ daddr_t *si_snapblklist; /* Snapshot block hints list */ uint32_t si_gen; /* Incremented on change */ @@ -140,6 +141,7 @@ TAILQ_INIT(&si->si_snapshots); mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE); + si->si_owner = NULL; si->si_gen = 0; si->si_snapblklist = NULL; @@ -173,7 +175,6 @@ } #else /* defined(FFS_NO_SNAPSHOT) */ bool suspended = false; - bool snapshot_locked = false; int error, redo = 0, snaploc; void *sbbuf = NULL; daddr_t *snaplist = NULL, snaplistsize = 0; @@ -269,11 +270,6 @@ if (error) goto out; /* - * Acquire the snapshot lock. - */ - mutex_enter(&si->si_snaplock); - snapshot_locked = true; - /* * Record snapshot inode. Since this is the newest snapshot, * it must be placed at the end of the list. */ @@ -376,8 +372,6 @@ si->si_gen++; mutex_exit(&si->si_lock); - if (snapshot_locked) - mutex_exit(&si->si_snaplock); if (suspended) { vfs_resume(vp->v_mount); #ifdef DEBUG @@ -1354,8 +1348,7 @@ struct snap_info *si; struct lwp *l = curlwp; daddr_t numblks, blkno, dblk; - int error, loc, last, n; - const int wbreak = blocks_in_journal(fs)/8; + int error, loc, last; si = VFSTOUFS(mp)->um_snapinfo; /* @@ -1365,6 +1358,7 @@ * * Clear copy-on-write flag if last snapshot. */ + mutex_enter(&si->si_snaplock); mutex_enter(&si->si_lock); if (is_active_snapshot(si, ip)) { TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap); @@ -1374,18 +1368,22 @@ si->si_snapblklist = xp->i_snapblklist; si->si_gen++; mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); } else { si->si_snapblklist = 0; si->si_gen++; mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); fscow_disestablish(mp, ffs_copyonwrite, devvp); } if (ip->i_snapblklist != NULL) { free(ip->i_snapblklist, M_UFSMNT); ip->i_snapblklist = NULL; } - } else + } else { mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); + } /* * Clear all BLK_NOCOPY fields. Pass any block claims to other * snapshots that want them (see ffs_snapblkfree below). @@ -1402,7 +1400,7 @@ } } numblks = howmany(ip->i_size, fs->fs_bsize); - for (blkno = NDADDR, n = 0; blkno < numblks; blkno += NINDIR(fs)) { + for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno), fs->fs_bsize, l->l_cred, B_METAONLY, &ibp); if (error) @@ -1412,12 +1410,6 @@ else last = fs->fs_size - blkno; for (loc = 0; loc < last; loc++) { - if (wbreak > 0 && (++n % wbreak) == 0) { - UFS_WAPBL_END(mp); - error = UFS_WAPBL_BEGIN(mp); - if (error) - panic("UFS_WAPBL_BEGIN failed"); - } dblk = idb_get(ip, ibp->b_data, loc); if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) idb_assign(ip, ibp->b_data, loc, 0); @@ -1429,6 +1421,9 @@ } } bawrite(ibp); + UFS_WAPBL_END(mp); + error = UFS_WAPBL_BEGIN(mp); + KASSERT(error == 0); } /* * Clear snapshot flag and drop reference. @@ -1469,25 +1464,18 @@ daddr_t lbn; daddr_t blkno; uint32_t gen; - int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0; + int indiroff = 0, error = 0, claimedblk = 0; si = VFSTOUFS(mp)->um_snapinfo; lbn = fragstoblks(fs, bno); + mutex_enter(&si->si_snaplock); mutex_enter(&si->si_lock); + si->si_owner = curlwp; + retry: gen = si->si_gen; TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) { vp = ITOV(ip); - if (snapshot_locked == 0) { - if (!mutex_tryenter(&si->si_snaplock)) { - mutex_exit(&si->si_lock); - mutex_enter(&si->si_snaplock); - mutex_enter(&si->si_lock); - } - snapshot_locked = 1; - if (gen != si->si_gen) - goto retry; - } /* * Lookup block being written. */ @@ -1584,6 +1572,9 @@ error = syncsnap(vp); else error = 0; + mutex_enter(&si->si_lock); + si->si_owner = NULL; + mutex_exit(&si->si_lock); mutex_exit(&si->si_snaplock); return (error == 0); } @@ -1623,7 +1614,9 @@ if (gen != si->si_gen) goto retry; } + si->si_owner = NULL; mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); if (saved_data) free(saved_data, M_UFSMNT); /* @@ -1632,8 +1625,6 @@ * not be freed. Although space will be lost, the snapshot * will stay consistent. */ - if (snapshot_locked) - mutex_exit(&si->si_snaplock); return (error); } @@ -1846,6 +1837,15 @@ /* * Not in the precomputed list, so check the snapshots. */ + if (si->si_owner != curlwp) { + if (!mutex_tryenter(&si->si_snaplock)) { + mutex_exit(&si->si_lock); + mutex_enter(&si->si_snaplock); + mutex_enter(&si->si_lock); + } + si->si_owner = curlwp; + snapshot_locked = 1; + } if (data_valid && bp->b_bcount == fs->fs_bsize) saved_data = bp->b_data; retry: @@ -1886,34 +1886,8 @@ error = ENOMEM; break; } - - if (snapshot_locked == 0) { - if (!mutex_tryenter(&si->si_snaplock)) { - mutex_exit(&si->si_lock); - mutex_enter(&si->si_snaplock); - mutex_enter(&si->si_lock); - } - snapshot_locked = 1; - if (gen != si->si_gen) - goto retry; - - /* Check again if block still needs to be copied */ - if (lbn < NDADDR) { - blkno = db_get(ip, lbn); - } else { - mutex_exit(&si->si_lock); - if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) { - mutex_enter(&si->si_lock); - break; - } - mutex_enter(&si->si_lock); - if (gen != si->si_gen) - goto retry; - } - - if (blkno != 0) - continue; - } + /* Only one level of recursion allowed. */ + KASSERT(snapshot_locked); /* * Allocate the block into which to do the copy. Since * multiple processes may all try to copy the same block, @@ -1968,11 +1942,14 @@ * have not been unlinked, and hence will be visible after * a crash, to ensure their integrity. */ - mutex_exit(&si->si_lock); + if (snapshot_locked) { + si->si_owner = NULL; + mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); + } else + mutex_exit(&si->si_lock); if (saved_data && saved_data != bp->b_data) free(saved_data, M_UFSMNT); - if (snapshot_locked) - mutex_exit(&si->si_snaplock); return error; }