Module Name: src Committed By: bouyer Date: Sat Jun 18 17:00:26 UTC 2011
Modified Files: src/sbin/dump [netbsd-5]: snapshot.c src/share/man/man4 [netbsd-5]: fss.4 src/sys/dev [netbsd-5]: fss.c fssvar.h src/sys/kern [netbsd-5]: vfs_wapbl.c src/sys/ufs/ffs [netbsd-5]: ffs_balloc.c ffs_snapshot.c src/usr.sbin/fssconfig [netbsd-5]: fssconfig.c Log Message: Pull up following revision(s) (requested by hannken in ticket #1627): sys/kern/vfs_wapbl.c: revisions 1.41-1.42 sbin/dump/snapshot.c: revisions 1.6 (patch) share/man/man4/fss.4: revisions 1.15 (patch) sys/dev/fss.c: revisions 1.73 (patch) sys/dev/fssvar.h: revisions 1.25 usr.sbin/fssconfig/fssconfig.c: revisions 1.7 sys/ufs/ffs/ffs_balloc.c: revisions 1.54 sys/ufs/ffs/ffs_snapshot.c: revisions 1.90, 1.98, 1.100-1.101, 1.103-1.110, 1.111, 1.112-1.115 (patch) - Try to keep snapshot indirect blocks contiguous. This speeds up snapshot creation by a factor of ~3 and reduces the file system suspension time by a factor of ~5. - Refine the scope of WAPBL transactions and the limit for deallocations in one transaction so we should no longer get a "wapbl_flush: current transaction too big to flush" panic when creating or removing snapshots on larger logging disks. - fss(4): Allow FSSIOCSET to set the initial flags. Add a new flag "FSS_UNLINK_ON_CREATE" to unlink the backing store before the snapshot gets created. With this change dump(8) no longer dumps the zero-sized, but named snapshot it is working on. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.4.4.1 src/sbin/dump/snapshot.c cvs rdiff -u -r1.12 -r1.12.2.1 src/share/man/man4/fss.4 cvs rdiff -u -r1.60.4.3 -r1.60.4.4 src/sys/dev/fss.c cvs rdiff -u -r1.23 -r1.23.4.1 src/sys/dev/fssvar.h cvs rdiff -u -r1.3.8.5 -r1.3.8.6 src/sys/kern/vfs_wapbl.c cvs rdiff -u -r1.51 -r1.51.4.1 src/sys/ufs/ffs/ffs_balloc.c cvs rdiff -u -r1.82.4.3 -r1.82.4.4 src/sys/ufs/ffs/ffs_snapshot.c cvs rdiff -u -r1.6 -r1.6.6.1 src/usr.sbin/fssconfig/fssconfig.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sbin/dump/snapshot.c diff -u src/sbin/dump/snapshot.c:1.4 src/sbin/dump/snapshot.c:1.4.4.1 --- src/sbin/dump/snapshot.c:1.4 Mon Apr 28 20:23:08 2008 +++ src/sbin/dump/snapshot.c Sat Jun 18 17:00:25 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: snapshot.c,v 1.4 2008/04/28 20:23:08 martin Exp $ */ +/* $NetBSD: snapshot.c,v 1.4.4.1 2011/06/18 17:00:25 bouyer Exp $ */ /*- * Copyright (c) 2005 The NetBSD Foundation, Inc. @@ -56,7 +56,7 @@ int snap_open(char *mountpoint, char *backup, time_t *snap_date, char **snap_dev) { - int i, fd, israw, fsinternal, dounlink, flags; + int i, fd, israw, fsinternal, dounlink; char path[MAXPATHLEN], fss_dev[14]; dev_t mountdev; struct fss_set fss; @@ -118,6 +118,9 @@ if (close(fd) < 0) goto fail; + fss.fss_flags = FSS_UNCONFIG_ON_CLOSE; + if (dounlink) + fss.fss_flags |= FSS_UNLINK_ON_CREATE; /* * Create the snapshot on the first free snapshot device. */ @@ -126,9 +129,6 @@ if ((fd = open(fss_dev, O_RDWR, 0)) < 0) goto fail; - if (ioctl(fd, FSSIOFGET, &flags) < 0) - goto fail; - if (ioctl(fd, FSSIOCSET, &fss) < 0) { if (errno != EBUSY) goto fail; @@ -136,6 +136,7 @@ fd = -1; continue; } + dounlink = 0; if (snap_dev != NULL) { *snap_dev = strdup(fss_dev); @@ -145,10 +146,7 @@ } } - flags |= FSS_UNCONFIG_ON_CLOSE; - if (ioctl(fd, FSSIOCGET, &fsg) < 0 || - ioctl(fd, FSSIOFSET, &flags) < 0 || - (!israw && unlink(fss.fss_bstore) < 0)) { + if (ioctl(fd, FSSIOCGET, &fsg) < 0) { ioctl(fd, FSSIOCCLR); goto fail; } Index: src/share/man/man4/fss.4 diff -u src/share/man/man4/fss.4:1.12 src/share/man/man4/fss.4:1.12.2.1 --- src/share/man/man4/fss.4:1.12 Sun Sep 21 10:25:07 2008 +++ src/share/man/man4/fss.4 Sat Jun 18 17:00:25 2011 @@ -1,4 +1,4 @@ -.\" $NetBSD: fss.4,v 1.12 2008/09/21 10:25:07 hannken Exp $ */ +.\" $NetBSD: fss.4,v 1.12.2.1 2011/06/18 17:00:25 bouyer Exp $ */ .\" .\" .\" Copyright (c) 2003 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd September 21, 2008 +.Dd February 24, 2011 .Dt FSS 4 .Os .Sh NAME @@ -65,6 +65,7 @@ char *fss_mount; char *fss_bstore; blksize_t fss_csize; + int fss_flags; }; .Ed .Pp @@ -78,6 +79,9 @@ The struct element .Va fss_csize is the preferred size of this data. +The struct element +.Va fss_flags +is the initial set of flags. .It Dv FSSIOCGET(struct fss_get) Gets the status of a .Nm @@ -120,6 +124,10 @@ Unconfigure the .Nm device on the last close. +.It Dv FSS_UNLINK_ON_CREATE +Unlink the backing file before the +.Nm +device is created. .El .It Dv FSSIOFGET(int) Gets the flags of a Index: src/sys/dev/fss.c diff -u src/sys/dev/fss.c:1.60.4.3 src/sys/dev/fss.c:1.60.4.4 --- src/sys/dev/fss.c:1.60.4.3 Sun Mar 28 17:26:26 2010 +++ src/sys/dev/fss.c Sat Jun 18 17:00:25 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: fss.c,v 1.60.4.3 2010/03/28 17:26:26 snj Exp $ */ +/* $NetBSD: fss.c,v 1.60.4.4 2011/06/18 17:00:25 bouyer Exp $ */ /*- * Copyright (c) 2003 The NetBSD Foundation, Inc. @@ -36,7 +36,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.60.4.3 2010/03/28 17:26:26 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.60.4.4 2011/06/18 17:00:25 bouyer Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -58,6 +58,7 @@ #include <sys/kthread.h> #include <sys/fstrans.h> #include <sys/simplelock.h> +#include <sys/vfs_syscalls.h> /* For do_sys_unlink(). */ #include <miscfs/specfs/specdev.h> @@ -301,6 +302,9 @@ struct fss_get *fsg = (struct fss_get *)data; switch (cmd) { + case FSSIOCSET50: + fss->fss_flags = 0; + /* Fall through */ case FSSIOCSET: mutex_enter(&sc->sc_lock); if ((flag & FWRITE) == 0) @@ -309,6 +313,8 @@ error = EBUSY; else error = fss_create_snapshot(sc, fss, l); + if (error == 0) + sc->sc_uflags = fss->fss_flags; mutex_exit(&sc->sc_lock); break; @@ -604,7 +610,7 @@ * Check for file system internal snapshot. */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fss->fss_bstore); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore); if ((error = namei(&nd)) != 0) return error; @@ -618,14 +624,20 @@ sc->sc_bs_bshift++) if (FSS_FSBSIZE(sc) == fsbsize) break; - if (sc->sc_bs_bshift >= bits) { - VOP_UNLOCK(sc->sc_bs_vp, 0); + if (sc->sc_bs_bshift >= bits) return EINVAL; - } sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; sc->sc_clshift = 0; + if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { + error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); + if (error) + return error; + } + error = vn_lock(nd.ni_vp, LK_EXCLUSIVE); + if (error != 0) + return error; error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); @@ -633,7 +645,7 @@ return error; } - vput(nd.ni_vp); + vrele(nd.ni_vp); /* * Get the block device it is mounted on. @@ -676,6 +688,11 @@ if (nd.ni_vp->v_type != VREG && nd.ni_vp->v_type != VCHR) return EINVAL; + if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { + error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); + if (error) + return error; + } if (sc->sc_bs_vp->v_type == VREG) { error = VOP_GETATTR(sc->sc_bs_vp, &va, l->l_cred); if (error != 0) @@ -803,7 +820,7 @@ fss_softc_free(sc); if (sc->sc_bs_vp != NULL) { if (sc->sc_flags & FSS_PERSISTENT) - vn_close(sc->sc_bs_vp, FREAD, l->l_cred); + vrele(sc->sc_bs_vp); else vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); } @@ -830,7 +847,7 @@ fss_softc_free(sc); if (sc->sc_flags & FSS_PERSISTENT) - vn_close(sc->sc_bs_vp, FREAD, l->l_cred); + vrele(sc->sc_bs_vp); else vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); sc->sc_bs_vp = NULL; Index: src/sys/dev/fssvar.h diff -u src/sys/dev/fssvar.h:1.23 src/sys/dev/fssvar.h:1.23.4.1 --- src/sys/dev/fssvar.h:1.23 Wed Sep 17 14:49:25 2008 +++ src/sys/dev/fssvar.h Sat Jun 18 17:00:25 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: fssvar.h,v 1.23 2008/09/17 14:49:25 hannken Exp $ */ +/* $NetBSD: fssvar.h,v 1.23.4.1 2011/06/18 17:00:25 bouyer Exp $ */ /*- * Copyright (c) 2003, 2007 The NetBSD Foundation, Inc. @@ -35,11 +35,13 @@ #include <sys/simplelock.h> #define FSS_UNCONFIG_ON_CLOSE 0x01 /* Unconfigure on last close */ +#define FSS_UNLINK_ON_CREATE 0x02 /* Unlink backing store on create */ struct fss_set { char *fss_mount; /* Mount point of file system */ char *fss_bstore; /* Path of backing store */ blksize_t fss_csize; /* Preferred cluster size */ + int fss_flags; /* Initial flags */ }; struct fss_get { @@ -50,11 +52,12 @@ blkcnt_t fsg_bs_size; /* # clusters on backing store */ }; -#define FSSIOCSET _IOW('F', 0, struct fss_set) /* Configure */ +#define FSSIOCSET _IOW('F', 5, struct fss_set) /* Configure */ #define FSSIOCGET _IOR('F', 1, struct fss_get) /* Status */ #define FSSIOCCLR _IO('F', 2) /* Unconfigure */ #define FSSIOFSET _IOW('F', 3, int) /* Set flags */ #define FSSIOFGET _IOR('F', 4, int) /* Get flags */ +#define FSSIOCSET50 _IOW('F', 0, struct fss_set) /* Old configure */ #ifdef _KERNEL Index: src/sys/kern/vfs_wapbl.c diff -u src/sys/kern/vfs_wapbl.c:1.3.8.5 src/sys/kern/vfs_wapbl.c:1.3.8.6 --- src/sys/kern/vfs_wapbl.c:1.3.8.5 Mon Mar 7 04:09:28 2011 +++ src/sys/kern/vfs_wapbl.c Sat Jun 18 17:00:25 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_wapbl.c,v 1.3.8.5 2011/03/07 04:09:28 riz Exp $ */ +/* $NetBSD: vfs_wapbl.c,v 1.3.8.6 2011/06/18 17:00:25 bouyer Exp $ */ /*- * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc. @@ -33,7 +33,7 @@ * This implements file system independent write ahead filesystem logging. */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.3.8.5 2011/03/07 04:09:28 riz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.3.8.6 2011/06/18 17:00:25 bouyer Exp $"); #include <sys/param.h> @@ -401,7 +401,7 @@ wl->wl_bufcount_max = (nbuf / 2) * 1024; /* XXX tie this into resource estimation */ - wl->wl_dealloclim = 2 * btodb(wl->wl_bufbytes_max); + wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2; #if WAPBL_UVM_ALLOC wl->wl_deallocblks = (void *) uvm_km_zalloc(kernel_map, @@ -830,8 +830,7 @@ ((wl->wl_bufcount + (lockcount * 10)) > wl->wl_bufcount_max / 2) || (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) || - (wl->wl_dealloccnt >= - (wl->wl_dealloclim - (wl->wl_dealloclim >> 8))); + (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2)); mutex_exit(&wl->wl_mtx); if (doflush) { Index: src/sys/ufs/ffs/ffs_balloc.c diff -u src/sys/ufs/ffs/ffs_balloc.c:1.51 src/sys/ufs/ffs/ffs_balloc.c:1.51.4.1 --- src/sys/ufs/ffs/ffs_balloc.c:1.51 Thu Jul 31 05:38:06 2008 +++ src/sys/ufs/ffs/ffs_balloc.c Sat Jun 18 17:00:25 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $ */ +/* $NetBSD: ffs_balloc.c,v 1.51.4.1 2011/06/18 17:00:25 bouyer Exp $ */ /* * Copyright (c) 2002 Networks Associates Technology, Inc. @@ -41,7 +41,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51.4.1 2011/06/18 17:00:25 bouyer Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -340,6 +340,10 @@ goto fail; } mutex_enter(&ump->um_lock); + /* Try to keep snapshot indirect blocks contiguous. */ + if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0) + pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off, + flags | B_METAONLY, &bap[0]); if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL); @@ -932,6 +936,10 @@ goto fail; } mutex_enter(&ump->um_lock); + /* Try to keep snapshot indirect blocks contiguous. */ + if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0) + pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off, + flags | B_METAONLY, &bap[0]); if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL); Index: src/sys/ufs/ffs/ffs_snapshot.c diff -u src/sys/ufs/ffs/ffs_snapshot.c:1.82.4.3 src/sys/ufs/ffs/ffs_snapshot.c:1.82.4.4 --- src/sys/ufs/ffs/ffs_snapshot.c:1.82.4.3 Sun Mar 28 17:28:33 2010 +++ src/sys/ufs/ffs/ffs_snapshot.c Sat Jun 18 17:00:26 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_snapshot.c,v 1.82.4.3 2010/03/28 17:28:33 snj Exp $ */ +/* $NetBSD: ffs_snapshot.c,v 1.82.4.4 2011/06/18 17:00:26 bouyer Exp $ */ /* * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.82.4.3 2010/03/28 17:28:33 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.82.4.4 2011/06/18 17:00:26 bouyer Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -79,6 +79,7 @@ struct snap_info { kmutex_t si_lock; /* Lock this snapinfo */ kmutex_t si_snaplock; /* Snapshot vnode common lock */ + lwp_t *si_owner; /* Sanplock owner */ TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */ daddr_t *si_snapblklist; /* Snapshot block hints list */ uint32_t si_gen; /* Incremented on change */ @@ -114,6 +115,9 @@ static int rwfsblk(struct vnode *, int, void *, daddr_t); static int syncsnap(struct vnode *); static int wrsnapblk(struct vnode *, void *, daddr_t); +#if !defined(FFS_NO_SNAPSHOT) +static int blocks_in_journal(struct fs *); +#endif static inline bool is_active_snapshot(struct snap_info *, struct inode *); static inline daddr_t db_get(struct inode *, int); @@ -139,6 +143,7 @@ TAILQ_INIT(&si->si_snapshots); mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE); + si->si_owner = NULL; si->si_gen = 0; si->si_snapblklist = NULL; @@ -172,7 +177,6 @@ } #else /* defined(FFS_NO_SNAPSHOT) */ bool suspended = false; - bool snapshot_locked = false; int error, redo = 0, snaploc; void *sbbuf = NULL; daddr_t *snaplist = NULL, snaplistsize = 0; @@ -213,12 +217,6 @@ if (error) goto out; /* - * Change inode to snapshot type file. - */ - ip->i_flags |= SF_SNAPSHOT; - DIP_ASSIGN(ip, flags, ip->i_flags); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - /* * Copy all the cylinder group maps. Although the * filesystem is still active, we hope that only a few * cylinder groups will change between now and when we @@ -267,15 +265,11 @@ if (error) goto out; /* - * Acquire the snapshot lock. - */ - mutex_enter(&si->si_snaplock); - snapshot_locked = true; - /* * Record snapshot inode. Since this is the newest snapshot, * it must be placed at the end of the list. */ - fs->fs_snapinum[snaploc] = ip->i_number; + if (ip->i_ffs_effnlink > 0) + fs->fs_snapinum[snaploc] = ip->i_number; mutex_enter(&si->si_lock); if (is_active_snapshot(si, ip)) @@ -374,15 +368,15 @@ si->si_gen++; mutex_exit(&si->si_lock); - if (snapshot_locked) - mutex_exit(&si->si_snaplock); if (suspended) { + VOP_UNLOCK(vp, 0); vfs_resume(vp->v_mount); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef DEBUG getmicrotime(&endtime); timersub(&endtime, &starttime, &endtime); - printf("%s: suspended %ld.%03ld sec, redo %d of %d\n", - mp->mnt_stat.f_mntonname, (long)endtime.tv_sec, + printf("%s: suspended %lld.%03ld sec, redo %d of %d\n", + mp->mnt_stat.f_mntonname, (long long)endtime.tv_sec, endtime.tv_usec / 1000, redo, fs->fs_ncg); #endif } @@ -391,7 +385,7 @@ (void) ffs_truncate(vp, (off_t)0, 0, NOCRED); UFS_WAPBL_END(mp); } - } else + } else if (ip->i_ffs_effnlink > 0) vref(vp); return (error); } @@ -402,11 +396,13 @@ static int snapshot_setup(struct mount *mp, struct vnode *vp) { - int error, i, len, loc; + int error, n, len, loc, cg; daddr_t blkno, numblks; struct buf *ibp, *nbp; struct fs *fs = VFSTOUFS(mp)->um_fs; struct lwp *l = curlwp; + const int wbreak = blocks_in_journal(fs)/8; + struct inode *ip = VTOI(vp); /* * Check mount, exclusive reference and owner. @@ -425,6 +421,11 @@ if (error) return error; } + + /* Change inode to snapshot type file. */ + ip->i_flags |= SF_SNAPSHOT; + DIP_ASSIGN(ip, flags, ip->i_flags); + ip->i_flag |= IN_CHANGE | IN_UPDATE; /* * Write an empty list of preallocated blocks to the end of * the snapshot to set size to at least that of the filesystem. @@ -451,7 +452,7 @@ error = UFS_WAPBL_BEGIN(mp); if (error) return error; - for (blkno = NDADDR, i = 0; blkno < numblks; blkno += NINDIR(fs)) { + for (blkno = NDADDR, n = 0; blkno < numblks; blkno += NINDIR(fs)) { error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno), fs->fs_bsize, l->l_cred, B_METAONLY, &ibp); if (error) @@ -460,7 +461,7 @@ bawrite(ibp); else brelse(ibp, 0); - if ((++i % 16) == 0) { + if (wbreak > 0 && (++n % wbreak) == 0) { UFS_WAPBL_END(mp); error = UFS_WAPBL_BEGIN(mp); if (error) @@ -483,6 +484,28 @@ if (error) goto out; bawrite(nbp); + if (wbreak > 0 && (++n % wbreak) == 0) { + UFS_WAPBL_END(mp); + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; + } + } + /* + * Allocate all cylinder group blocks. + */ + for (cg = 0; cg < fs->fs_ncg; cg++) { + error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)), + fs->fs_bsize, l->l_cred, 0, &nbp); + if (error) + goto out; + bawrite(nbp); + if (wbreak > 0 && (++n % wbreak) == 0) { + UFS_WAPBL_END(mp); + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; + } } out: @@ -563,8 +586,7 @@ snapshot_expunge(struct mount *mp, struct vnode *vp, struct fs *copy_fs, daddr_t *snaplistsize, daddr_t **snaplist) { - bool has_wapbl = false; - int cg, error, len, loc; + int cg, error = 0, len, loc; daddr_t blkno, *blkp; struct fs *fs = VFSTOUFS(mp)->um_fs; struct inode *xp; @@ -595,10 +617,6 @@ */ *snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; - error = UFS_WAPBL_BEGIN(mp); - if (error) - goto out; - has_wapbl = true; mutex_enter(&mntvnode_lock); /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() @@ -650,19 +668,30 @@ if (loc < NDADDR) { len = fragroundup(fs, blkoff(fs, xp->i_size)); if (len > 0 && len < fs->fs_bsize) { + error = UFS_WAPBL_BEGIN(mp); + if (error) { + (void)vunmark(mvp); + goto out; + } ffs_blkfree(copy_fs, vp, db_get(xp, loc), len, xp->i_number); blkno = db_get(xp, loc); db_assign(xp, loc, 0); + UFS_WAPBL_END(mp); } } *snaplistsize += 1; error = expunge(vp, xp, copy_fs, fullacct, BLK_NOCOPY); if (blkno) db_assign(xp, loc, blkno); - if (!error) - error = ffs_freefile(copy_fs, vp, xp->i_number, - xp->i_mode); + if (!error) { + error = UFS_WAPBL_BEGIN(mp); + if (!error) { + error = ffs_freefile(copy_fs, vp, + xp->i_number, xp->i_mode); + UFS_WAPBL_END(mp); + } + } if (error) { (void)vunmark(mvp); goto out; @@ -687,10 +716,9 @@ *blkp++ = blkno + loc; for (; cg < fs->fs_ncg; cg++) *blkp++ = fragstoblks(fs, cgtod(fs, cg)); + (*snaplist)[0] = blkp - &(*snaplist)[0]; out: - if (has_wapbl) - UFS_WAPBL_END(mp); if (mvp != NULL) vnfree(mvp); if (logvp != NULL) @@ -712,25 +740,26 @@ snapshot_expunge_snap(struct mount *mp, struct vnode *vp, struct fs *copy_fs, daddr_t snaplistsize) { - int error, i; + int error = 0, i; daddr_t numblks, *snaplist = NULL; struct fs *fs = VFSTOUFS(mp)->um_fs; struct inode *ip = VTOI(vp), *xp; struct lwp *l = curlwp; struct snap_info *si = VFSTOUFS(mp)->um_snapinfo; - error = UFS_WAPBL_BEGIN(mp); - if (error) - return error; TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) { - if (xp == ip) - break; - error = expunge(vp, xp, fs, snapacct, BLK_SNAP); - if (error) - break; + if (xp != ip) { + error = expunge(vp, xp, fs, snapacct, BLK_SNAP); + if (error) + break; + } if (xp->i_ffs_effnlink != 0) continue; + error = UFS_WAPBL_BEGIN(mp); + if (error) + break; error = ffs_freefile(copy_fs, vp, xp->i_number, xp->i_mode); + UFS_WAPBL_END(mp); if (error) break; } @@ -762,12 +791,10 @@ snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs)); error = vn_rdwr(UIO_WRITE, vp, (void *)snaplist, snaplistsize * sizeof(daddr_t), lblktosize(fs, (off_t)numblks), - UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED | IO_UNIT, - l->l_cred, NULL, NULL); + UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, l->l_cred, NULL, NULL); for (i = 0; i < snaplistsize; i++) snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs)); out: - UFS_WAPBL_END(mp); if (error && snaplist != NULL) { free(snaplist, M_UFSMNT); ip->i_snapblklist = NULL; @@ -860,13 +887,10 @@ static int cgaccount(struct vnode *vp, int passno, int *redo) { - int cg, error; + int cg, error = 0; struct buf *nbp; struct fs *fs = VTOI(vp)->i_fs; - error = UFS_WAPBL_BEGIN(vp->v_mount); - if (error) - return error; if (redo != NULL) *redo = 0; if (passno == 1) @@ -875,18 +899,24 @@ for (cg = 0; cg < fs->fs_ncg; cg++) { if (passno == 2 && ACTIVECG_ISSET(fs, cg)) continue; + if (redo != NULL) *redo += 1; + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return error; error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)), fs->fs_bsize, curlwp->l_cred, 0, &nbp); - if (error) + if (error) { + UFS_WAPBL_END(vp->v_mount); break; + } error = cgaccount1(cg, vp, nbp->b_data, passno); bawrite(nbp); + UFS_WAPBL_END(vp->v_mount); if (error) break; } - UFS_WAPBL_END(vp->v_mount); return error; } @@ -993,8 +1023,14 @@ struct lwp *l = curlwp; void *bap; struct buf *bp; + struct mount *mp; ns = UFS_FSNEEDSWAP(fs); + mp = snapvp->v_mount; + + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; /* * Prepare to expunge the inode. If its inode block has not * yet been copied, then allocate and fill the copy. @@ -1012,8 +1048,10 @@ if (! error) error = rwfsblk(snapvp, B_READ, bp->b_data, lbn); } - if (error) + if (error) { + UFS_WAPBL_END(mp); return error; + } /* * Set a snapshot inode to be a zero length file, regular files * or unlinked snapshots to be completely unallocated. @@ -1040,6 +1078,7 @@ bzero(&dip2->di_db[0], (NDADDR + NIADDR) * sizeof(int64_t)); } bdwrite(bp); + UFS_WAPBL_END(mp); /* * Now go through and expunge all the blocks in the file * using the function requested. @@ -1049,13 +1088,15 @@ bap = &cancelip->i_ffs1_db[0]; else bap = &cancelip->i_ffs2_db[0]; - if ((error = (*acctfunc)(snapvp, bap, 0, NDADDR, fs, 0, expungetype))) + error = (*acctfunc)(snapvp, bap, 0, NDADDR, fs, 0, expungetype); + if (error) return (error); if (fs->fs_magic == FS_UFS1_MAGIC) bap = &cancelip->i_ffs1_ib[0]; else bap = &cancelip->i_ffs2_ib[0]; - if ((error = (*acctfunc)(snapvp, bap, 0, NIADDR, fs, -1, expungetype))) + error = (*acctfunc)(snapvp, bap, 0, NIADDR, fs, -1, expungetype); + if (error) return (error); blksperindir = 1; lbn = -NDADDR; @@ -1171,12 +1212,17 @@ { struct inode *ip = VTOI(vp); struct lwp *l = curlwp; + struct mount *mp = vp->v_mount; daddr_t blkno; daddr_t lbn; struct buf *ibp; - int error; + int error, n; + const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8; - for ( ; oldblkp < lastblkp; oldblkp++) { + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; + for ( n = 0; oldblkp < lastblkp; oldblkp++) { blkno = idb_get(ip, bap, oldblkp); if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) continue; @@ -1188,7 +1234,7 @@ error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, l->l_cred, B_METAONLY, &ibp); if (error) - return (error); + break; blkno = idb_get(ip, ibp->b_data, (lbn - NDADDR) % NINDIR(fs)); } @@ -1212,8 +1258,15 @@ bdwrite(ibp); } } + if (wbreak > 0 && (++n % wbreak) == 0) { + UFS_WAPBL_END(mp); + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; + } } - return (0); + UFS_WAPBL_END(mp); + return error; } /* @@ -1225,16 +1278,21 @@ { daddr_t blkno; struct inode *ip; + struct mount *mp = vp->v_mount; ino_t inum; - int acctit; + int acctit, error, n; + const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8; + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; ip = VTOI(vp); inum = ip->i_number; if (lblkno == -1) acctit = 0; else acctit = 1; - for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { + for ( n = 0; oldblkp < lastblkp; oldblkp++, lblkno++) { blkno = idb_get(ip, bap, oldblkp); if (blkno == 0 || blkno == BLK_NOCOPY) continue; @@ -1243,9 +1301,43 @@ if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); + if (wbreak > 0 && (++n % wbreak) == 0) { + UFS_WAPBL_END(mp); + error = UFS_WAPBL_BEGIN(mp); + if (error) + return error; + } } + UFS_WAPBL_END(mp); return (0); } + +/* + * Number of blocks that fit into the journal or zero if not logging. + */ +static int +blocks_in_journal(struct fs *fs) +{ + off_t bpj; + + if ((fs->fs_flags & FS_DOWAPBL) == 0) + return 0; + bpj = 1; + if (fs->fs_journal_version == UFS_WAPBL_VERSION) { + switch (fs->fs_journal_location) { + case UFS_WAPBL_JOURNALLOC_END_PARTITION: + bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ]* + fs->fs_journallocs[UFS_WAPBL_EPART_COUNT]; + break; + case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM: + bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ]* + fs->fs_journallocs[UFS_WAPBL_INFS_COUNT]; + break; + } + } + bpj /= fs->fs_bsize; + return (bpj > 0 ? bpj : 1); +} #endif /* defined(FFS_NO_SNAPSHOT) */ /* @@ -1322,6 +1414,7 @@ * * Clear copy-on-write flag if last snapshot. */ + mutex_enter(&si->si_snaplock); mutex_enter(&si->si_lock); if (is_active_snapshot(si, ip)) { TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap); @@ -1331,18 +1424,22 @@ si->si_snapblklist = xp->i_snapblklist; si->si_gen++; mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); } else { si->si_snapblklist = 0; si->si_gen++; mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); fscow_disestablish(mp, ffs_copyonwrite, devvp); } if (ip->i_snapblklist != NULL) { free(ip->i_snapblklist, M_UFSMNT); ip->i_snapblklist = NULL; } - } else + } else { mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); + } /* * Clear all BLK_NOCOPY fields. Pass any block claims to other * snapshots that want them (see ffs_snapblkfree below). @@ -1380,6 +1477,9 @@ } } bawrite(ibp); + UFS_WAPBL_END(mp); + error = UFS_WAPBL_BEGIN(mp); + KASSERT(error == 0); } /* * Clear snapshot flag and drop reference. @@ -1420,25 +1520,18 @@ daddr_t lbn; daddr_t blkno; uint32_t gen; - int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0; + int indiroff = 0, error = 0, claimedblk = 0; si = VFSTOUFS(mp)->um_snapinfo; lbn = fragstoblks(fs, bno); + mutex_enter(&si->si_snaplock); mutex_enter(&si->si_lock); + si->si_owner = curlwp; + retry: gen = si->si_gen; TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) { vp = ITOV(ip); - if (snapshot_locked == 0) { - if (!mutex_tryenter(&si->si_snaplock)) { - mutex_exit(&si->si_lock); - mutex_enter(&si->si_snaplock); - mutex_enter(&si->si_lock); - } - snapshot_locked = 1; - if (gen != si->si_gen) - goto retry; - } /* * Lookup block being written. */ @@ -1535,6 +1628,9 @@ error = syncsnap(vp); else error = 0; + mutex_enter(&si->si_lock); + si->si_owner = NULL; + mutex_exit(&si->si_lock); mutex_exit(&si->si_snaplock); return (error == 0); } @@ -1574,7 +1670,9 @@ if (gen != si->si_gen) goto retry; } + si->si_owner = NULL; mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); if (saved_data) free(saved_data, M_UFSMNT); /* @@ -1583,8 +1681,6 @@ * not be freed. Although space will be lost, the snapshot * will stay consistent. */ - if (snapshot_locked) - mutex_exit(&si->si_snaplock); return (error); } @@ -1723,12 +1819,11 @@ mutex_enter(&si->si_lock); while ((xp = TAILQ_FIRST(&si->si_snapshots)) != 0) { vp = ITOV(xp); - vp->v_vnlock = &vp->v_lock; TAILQ_REMOVE(&si->si_snapshots, xp, i_nextsnap); if (xp->i_snapblklist == si->si_snapblklist) si->si_snapblklist = NULL; FREE(xp->i_snapblklist, M_UFSMNT); - if (xp->i_ffs_effnlink > 0) { + if (xp->i_ffs_effnlink != 0) { si->si_gen++; mutex_exit(&si->si_lock); vrele(vp); @@ -1798,6 +1893,15 @@ /* * Not in the precomputed list, so check the snapshots. */ + if (si->si_owner != curlwp) { + if (!mutex_tryenter(&si->si_snaplock)) { + mutex_exit(&si->si_lock); + mutex_enter(&si->si_snaplock); + mutex_enter(&si->si_lock); + } + si->si_owner = curlwp; + snapshot_locked = 1; + } if (data_valid && bp->b_bcount == fs->fs_bsize) saved_data = bp->b_data; retry: @@ -1819,6 +1923,7 @@ blkno = db_get(ip, lbn); } else { mutex_exit(&si->si_lock); + blkno = 0; /* XXX: GCC */ if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) { mutex_enter(&si->si_lock); break; @@ -1838,34 +1943,8 @@ error = ENOMEM; break; } - - if (snapshot_locked == 0) { - if (!mutex_tryenter(&si->si_snaplock)) { - mutex_exit(&si->si_lock); - mutex_enter(&si->si_snaplock); - mutex_enter(&si->si_lock); - } - snapshot_locked = 1; - if (gen != si->si_gen) - goto retry; - - /* Check again if block still needs to be copied */ - if (lbn < NDADDR) { - blkno = db_get(ip, lbn); - } else { - mutex_exit(&si->si_lock); - if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) { - mutex_enter(&si->si_lock); - break; - } - mutex_enter(&si->si_lock); - if (gen != si->si_gen) - goto retry; - } - - if (blkno != 0) - continue; - } + /* Only one level of recursion allowed. */ + KASSERT(snapshot_locked); /* * Allocate the block into which to do the copy. Since * multiple processes may all try to copy the same block, @@ -1920,11 +1999,14 @@ * have not been unlinked, and hence will be visible after * a crash, to ensure their integrity. */ - mutex_exit(&si->si_lock); + if (snapshot_locked) { + si->si_owner = NULL; + mutex_exit(&si->si_lock); + mutex_exit(&si->si_snaplock); + } else + mutex_exit(&si->si_lock); if (saved_data && saved_data != bp->b_data) free(saved_data, M_UFSMNT); - if (snapshot_locked) - mutex_exit(&si->si_snaplock); return error; } @@ -2084,9 +2166,14 @@ mutex_enter(&bufcache_lock); while ((bp = LIST_FIRST(&vp->v_dirtyblkhd))) { - KASSERT((bp->b_cflags & BC_BUSY) == 0); + error = bbusy(bp, false, 0, NULL); + if (error == EPASSTHROUGH) + continue; + else if (error != 0) { + mutex_exit(&bufcache_lock); + return error; + } KASSERT(bp->b_bcount == fs->fs_bsize); - bp->b_cflags |= BC_BUSY; mutex_exit(&bufcache_lock); error = rwfsblk(vp, B_WRITE, bp->b_data, fragstoblks(fs, dbtofsb(fs, bp->b_blkno))); Index: src/usr.sbin/fssconfig/fssconfig.c diff -u src/usr.sbin/fssconfig/fssconfig.c:1.6 src/usr.sbin/fssconfig/fssconfig.c:1.6.6.1 --- src/usr.sbin/fssconfig/fssconfig.c:1.6 Mon Apr 28 20:24:16 2008 +++ src/usr.sbin/fssconfig/fssconfig.c Sat Jun 18 17:00:26 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: fssconfig.c,v 1.6 2008/04/28 20:24:16 martin Exp $ */ +/* $NetBSD: fssconfig.c,v 1.6.6.1 2011/06/18 17:00:26 bouyer Exp $ */ /*- * Copyright (c) 2003 The NetBSD Foundation, Inc. @@ -168,15 +168,16 @@ err(1, "open: %s", argv[0]); } + if ((xflag || istmp) && isreg) + fss.fss_flags |= FSS_UNLINK_ON_CREATE; + else + fss.fss_flags = 0; if (ioctl(fd, FSSIOCSET, &fss) < 0) { if (istmp) unlink(fss.fss_bstore); err(1, "%s: FSSIOCSET", full); } - if ((xflag || istmp) && isreg && unlink(fss.fss_bstore) < 0) - err(1, "unlink: %s", fss.fss_bstore); - if (vflag) list(1, argv); }