Module Name:    src
Committed By:   bouyer
Date:           Sat Jun 18 17:00:26 UTC 2011

Modified Files:
        src/sbin/dump [netbsd-5]: snapshot.c
        src/share/man/man4 [netbsd-5]: fss.4
        src/sys/dev [netbsd-5]: fss.c fssvar.h
        src/sys/kern [netbsd-5]: vfs_wapbl.c
        src/sys/ufs/ffs [netbsd-5]: ffs_balloc.c ffs_snapshot.c
        src/usr.sbin/fssconfig [netbsd-5]: fssconfig.c

Log Message:
Pull up following revision(s) (requested by hannken in ticket #1627):
        sys/kern/vfs_wapbl.c: revisions                 1.41-1.42
        sbin/dump/snapshot.c: revisions                 1.6 (patch)
        share/man/man4/fss.4: revisions                 1.15 (patch)
        sys/dev/fss.c: revisions                        1.73 (patch)
        sys/dev/fssvar.h: revisions                     1.25
        usr.sbin/fssconfig/fssconfig.c: revisions       1.7
        sys/ufs/ffs/ffs_balloc.c: revisions             1.54
        sys/ufs/ffs/ffs_snapshot.c: revisions           1.90, 1.98, 
1.100-1.101, 1.103-1.110, 1.111, 1.112-1.115 (patch)

- Try to keep snapshot indirect blocks contiguous.  This speeds up snapshot
  creation by a factor of ~3 and reduces the file system suspension time by
  a factor of ~5.

- Refine the scope of WAPBL transactions and the limit for deallocations in
  one transaction so we should no longer get a "wapbl_flush: current
  transaction too big to flush" panic when creating or removing snapshots
  on larger logging disks.

- fss(4): Allow FSSIOCSET to set the initial flags.  Add a new flag
  "FSS_UNLINK_ON_CREATE" to unlink the backing store before the snapshot
  gets created.  With this change dump(8) no longer dumps the zero-sized,
  but named snapshot it is working on.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.4.4.1 src/sbin/dump/snapshot.c
cvs rdiff -u -r1.12 -r1.12.2.1 src/share/man/man4/fss.4
cvs rdiff -u -r1.60.4.3 -r1.60.4.4 src/sys/dev/fss.c
cvs rdiff -u -r1.23 -r1.23.4.1 src/sys/dev/fssvar.h
cvs rdiff -u -r1.3.8.5 -r1.3.8.6 src/sys/kern/vfs_wapbl.c
cvs rdiff -u -r1.51 -r1.51.4.1 src/sys/ufs/ffs/ffs_balloc.c
cvs rdiff -u -r1.82.4.3 -r1.82.4.4 src/sys/ufs/ffs/ffs_snapshot.c
cvs rdiff -u -r1.6 -r1.6.6.1 src/usr.sbin/fssconfig/fssconfig.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sbin/dump/snapshot.c
diff -u src/sbin/dump/snapshot.c:1.4 src/sbin/dump/snapshot.c:1.4.4.1
--- src/sbin/dump/snapshot.c:1.4	Mon Apr 28 20:23:08 2008
+++ src/sbin/dump/snapshot.c	Sat Jun 18 17:00:25 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: snapshot.c,v 1.4 2008/04/28 20:23:08 martin Exp $	*/
+/*	$NetBSD: snapshot.c,v 1.4.4.1 2011/06/18 17:00:25 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2005 The NetBSD Foundation, Inc.
@@ -56,7 +56,7 @@
 int
 snap_open(char *mountpoint, char *backup, time_t *snap_date, char **snap_dev)
 {
-	int i, fd, israw, fsinternal, dounlink, flags;
+	int i, fd, israw, fsinternal, dounlink;
 	char path[MAXPATHLEN], fss_dev[14];
 	dev_t mountdev;
 	struct fss_set fss;
@@ -118,6 +118,9 @@
 	if (close(fd) < 0)
 		goto fail;
 
+	fss.fss_flags = FSS_UNCONFIG_ON_CLOSE;
+	if (dounlink)
+		fss.fss_flags |= FSS_UNLINK_ON_CREATE;
 	/*
 	 * Create the snapshot on the first free snapshot device.
 	 */
@@ -126,9 +129,6 @@
 		if ((fd = open(fss_dev, O_RDWR, 0)) < 0)
 			goto fail;
 
-		if (ioctl(fd, FSSIOFGET, &flags) < 0)
-			goto fail;
-
 		if (ioctl(fd, FSSIOCSET, &fss) < 0) {
 			if (errno != EBUSY)
 				goto fail;
@@ -136,6 +136,7 @@
 			fd = -1;
 			continue;
 		}
+		dounlink = 0;
 
 		if (snap_dev != NULL) {
 			*snap_dev = strdup(fss_dev);
@@ -145,10 +146,7 @@
 			}
 		}
 
-		flags |= FSS_UNCONFIG_ON_CLOSE;
-		if (ioctl(fd, FSSIOCGET, &fsg) < 0 ||
-		    ioctl(fd, FSSIOFSET, &flags) < 0 ||
-		    (!israw && unlink(fss.fss_bstore) < 0)) {
+		if (ioctl(fd, FSSIOCGET, &fsg) < 0) {
 			ioctl(fd, FSSIOCCLR);
 			goto fail;
 		}

Index: src/share/man/man4/fss.4
diff -u src/share/man/man4/fss.4:1.12 src/share/man/man4/fss.4:1.12.2.1
--- src/share/man/man4/fss.4:1.12	Sun Sep 21 10:25:07 2008
+++ src/share/man/man4/fss.4	Sat Jun 18 17:00:25 2011
@@ -1,4 +1,4 @@
-.\"	$NetBSD: fss.4,v 1.12 2008/09/21 10:25:07 hannken Exp $	*/
+.\"	$NetBSD: fss.4,v 1.12.2.1 2011/06/18 17:00:25 bouyer Exp $	*/
 .\"
 .\"
 .\" Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd September 21, 2008
+.Dd February 24, 2011
 .Dt FSS 4
 .Os
 .Sh NAME
@@ -65,6 +65,7 @@
 	char *fss_mount;
 	char *fss_bstore;
 	blksize_t fss_csize;
+	int fss_flags;
 };
 .Ed
 .Pp
@@ -78,6 +79,9 @@
 The struct element
 .Va fss_csize
 is the preferred size of this data.
+The struct element
+.Va fss_flags
+is the initial set of flags.
 .It Dv FSSIOCGET(struct fss_get)
 Gets the status of a
 .Nm
@@ -120,6 +124,10 @@
 Unconfigure the
 .Nm
 device on the last close.
+.It Dv FSS_UNLINK_ON_CREATE
+Unlink the backing file before the
+.Nm
+device is created.
 .El
 .It Dv FSSIOFGET(int)
 Gets the flags of a

Index: src/sys/dev/fss.c
diff -u src/sys/dev/fss.c:1.60.4.3 src/sys/dev/fss.c:1.60.4.4
--- src/sys/dev/fss.c:1.60.4.3	Sun Mar 28 17:26:26 2010
+++ src/sys/dev/fss.c	Sat Jun 18 17:00:25 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: fss.c,v 1.60.4.3 2010/03/28 17:26:26 snj Exp $	*/
+/*	$NetBSD: fss.c,v 1.60.4.4 2011/06/18 17:00:25 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.60.4.3 2010/03/28 17:26:26 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.60.4.4 2011/06/18 17:00:25 bouyer Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -58,6 +58,7 @@
 #include <sys/kthread.h>
 #include <sys/fstrans.h>
 #include <sys/simplelock.h>
+#include <sys/vfs_syscalls.h>		/* For do_sys_unlink(). */
 
 #include <miscfs/specfs/specdev.h>
 
@@ -301,6 +302,9 @@
 	struct fss_get *fsg = (struct fss_get *)data;
 
 	switch (cmd) {
+	case FSSIOCSET50:
+		fss->fss_flags = 0;
+		/* Fall through */
 	case FSSIOCSET:
 		mutex_enter(&sc->sc_lock);
 		if ((flag & FWRITE) == 0)
@@ -309,6 +313,8 @@
 			error = EBUSY;
 		else
 			error = fss_create_snapshot(sc, fss, l);
+		if (error == 0)
+			sc->sc_uflags = fss->fss_flags;
 		mutex_exit(&sc->sc_lock);
 		break;
 
@@ -604,7 +610,7 @@
 	 * Check for file system internal snapshot.
 	 */
 
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fss->fss_bstore);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore);
 	if ((error = namei(&nd)) != 0)
 		return error;
 
@@ -618,14 +624,20 @@
 		    sc->sc_bs_bshift++)
 			if (FSS_FSBSIZE(sc) == fsbsize)
 				break;
-		if (sc->sc_bs_bshift >= bits) {
-			VOP_UNLOCK(sc->sc_bs_vp, 0);
+		if (sc->sc_bs_bshift >= bits)
 			return EINVAL;
-		}
 
 		sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
 		sc->sc_clshift = 0;
 
+		if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
+			error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
+			if (error)
+				return error;
+		}
+		error = vn_lock(nd.ni_vp, LK_EXCLUSIVE);
+		if (error != 0)
+			return error;
 		error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts);
 		TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts);
 
@@ -633,7 +645,7 @@
 
 		return error;
 	}
-	vput(nd.ni_vp);
+	vrele(nd.ni_vp);
 
 	/*
 	 * Get the block device it is mounted on.
@@ -676,6 +688,11 @@
 	if (nd.ni_vp->v_type != VREG && nd.ni_vp->v_type != VCHR)
 		return EINVAL;
 
+	if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
+		error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
+		if (error)
+			return error;
+	}
 	if (sc->sc_bs_vp->v_type == VREG) {
 		error = VOP_GETATTR(sc->sc_bs_vp, &va, l->l_cred);
 		if (error != 0)
@@ -803,7 +820,7 @@
 	fss_softc_free(sc);
 	if (sc->sc_bs_vp != NULL) {
 		if (sc->sc_flags & FSS_PERSISTENT)
-			vn_close(sc->sc_bs_vp, FREAD, l->l_cred);
+			vrele(sc->sc_bs_vp);
 		else
 			vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
 	}
@@ -830,7 +847,7 @@
 
 	fss_softc_free(sc);
 	if (sc->sc_flags & FSS_PERSISTENT)
-		vn_close(sc->sc_bs_vp, FREAD, l->l_cred);
+		vrele(sc->sc_bs_vp);
 	else
 		vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
 	sc->sc_bs_vp = NULL;

Index: src/sys/dev/fssvar.h
diff -u src/sys/dev/fssvar.h:1.23 src/sys/dev/fssvar.h:1.23.4.1
--- src/sys/dev/fssvar.h:1.23	Wed Sep 17 14:49:25 2008
+++ src/sys/dev/fssvar.h	Sat Jun 18 17:00:25 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: fssvar.h,v 1.23 2008/09/17 14:49:25 hannken Exp $	*/
+/*	$NetBSD: fssvar.h,v 1.23.4.1 2011/06/18 17:00:25 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2007 The NetBSD Foundation, Inc.
@@ -35,11 +35,13 @@
 #include <sys/simplelock.h>
 
 #define FSS_UNCONFIG_ON_CLOSE	0x01	/* Unconfigure on last close */
+#define FSS_UNLINK_ON_CREATE	0x02	/* Unlink backing store on create */
 
 struct fss_set {
 	char		*fss_mount;	/* Mount point of file system */
 	char		*fss_bstore;	/* Path of backing store */
 	blksize_t	fss_csize;	/* Preferred cluster size */
+	int		fss_flags;	/* Initial flags */
 };
 
 struct fss_get {
@@ -50,11 +52,12 @@
 	blkcnt_t	fsg_bs_size;	/* # clusters on backing store */
 };
 
-#define FSSIOCSET	_IOW('F', 0, struct fss_set)	/* Configure */
+#define FSSIOCSET	_IOW('F', 5, struct fss_set)	/* Configure */
 #define FSSIOCGET	_IOR('F', 1, struct fss_get)	/* Status */
 #define FSSIOCCLR	_IO('F', 2)			/* Unconfigure */
 #define FSSIOFSET	_IOW('F', 3, int)		/* Set flags */
 #define FSSIOFGET	_IOR('F', 4, int)		/* Get flags */
+#define FSSIOCSET50	_IOW('F', 0, struct fss_set)	/* Old configure */
 
 #ifdef _KERNEL
 

Index: src/sys/kern/vfs_wapbl.c
diff -u src/sys/kern/vfs_wapbl.c:1.3.8.5 src/sys/kern/vfs_wapbl.c:1.3.8.6
--- src/sys/kern/vfs_wapbl.c:1.3.8.5	Mon Mar  7 04:09:28 2011
+++ src/sys/kern/vfs_wapbl.c	Sat Jun 18 17:00:25 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_wapbl.c,v 1.3.8.5 2011/03/07 04:09:28 riz Exp $	*/
+/*	$NetBSD: vfs_wapbl.c,v 1.3.8.6 2011/06/18 17:00:25 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
  * This implements file system independent write ahead filesystem logging.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.3.8.5 2011/03/07 04:09:28 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.3.8.6 2011/06/18 17:00:25 bouyer Exp $");
 
 #include <sys/param.h>
 
@@ -401,7 +401,7 @@
 	wl->wl_bufcount_max = (nbuf / 2) * 1024;
 
 	/* XXX tie this into resource estimation */
-	wl->wl_dealloclim = 2 * btodb(wl->wl_bufbytes_max);
+	wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2;
 	
 #if WAPBL_UVM_ALLOC
 	wl->wl_deallocblks = (void *) uvm_km_zalloc(kernel_map,
@@ -830,8 +830,7 @@
 		  ((wl->wl_bufcount + (lockcount * 10)) >
 		   wl->wl_bufcount_max / 2) ||
 		  (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) ||
-		  (wl->wl_dealloccnt >=
-		   (wl->wl_dealloclim - (wl->wl_dealloclim >> 8)));
+		  (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2));
 	mutex_exit(&wl->wl_mtx);
 
 	if (doflush) {

Index: src/sys/ufs/ffs/ffs_balloc.c
diff -u src/sys/ufs/ffs/ffs_balloc.c:1.51 src/sys/ufs/ffs/ffs_balloc.c:1.51.4.1
--- src/sys/ufs/ffs/ffs_balloc.c:1.51	Thu Jul 31 05:38:06 2008
+++ src/sys/ufs/ffs/ffs_balloc.c	Sat Jun 18 17:00:25 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $	*/
+/*	$NetBSD: ffs_balloc.c,v 1.51.4.1 2011/06/18 17:00:25 bouyer Exp $	*/
 
 /*
  * Copyright (c) 2002 Networks Associates Technology, Inc.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51.4.1 2011/06/18 17:00:25 bouyer Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -340,6 +340,10 @@
 			goto fail;
 		}
 		mutex_enter(&ump->um_lock);
+		/* Try to keep snapshot indirect blocks contiguous. */
+		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
+			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
+			    flags | B_METAONLY, &bap[0]);
 		if (pref == 0)
 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
 			    NULL);
@@ -932,6 +936,10 @@
 			goto fail;
 		}
 		mutex_enter(&ump->um_lock);
+		/* Try to keep snapshot indirect blocks contiguous. */
+		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
+			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
+			    flags | B_METAONLY, &bap[0]);
 		if (pref == 0)
 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
 			    NULL);

Index: src/sys/ufs/ffs/ffs_snapshot.c
diff -u src/sys/ufs/ffs/ffs_snapshot.c:1.82.4.3 src/sys/ufs/ffs/ffs_snapshot.c:1.82.4.4
--- src/sys/ufs/ffs/ffs_snapshot.c:1.82.4.3	Sun Mar 28 17:28:33 2010
+++ src/sys/ufs/ffs/ffs_snapshot.c	Sat Jun 18 17:00:26 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_snapshot.c,v 1.82.4.3 2010/03/28 17:28:33 snj Exp $	*/
+/*	$NetBSD: ffs_snapshot.c,v 1.82.4.4 2011/06/18 17:00:26 bouyer Exp $	*/
 
 /*
  * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.82.4.3 2010/03/28 17:28:33 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.82.4.4 2011/06/18 17:00:26 bouyer Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -79,6 +79,7 @@
 struct snap_info {
 	kmutex_t si_lock;			/* Lock this snapinfo */
 	kmutex_t si_snaplock;			/* Snapshot vnode common lock */
+	lwp_t *si_owner;			/* Sanplock owner */
 	TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */
 	daddr_t *si_snapblklist;		/* Snapshot block hints list */
 	uint32_t si_gen;			/* Incremented on change */
@@ -114,6 +115,9 @@
 static int rwfsblk(struct vnode *, int, void *, daddr_t);
 static int syncsnap(struct vnode *);
 static int wrsnapblk(struct vnode *, void *, daddr_t);
+#if !defined(FFS_NO_SNAPSHOT)
+static int blocks_in_journal(struct fs *);
+#endif
 
 static inline bool is_active_snapshot(struct snap_info *, struct inode *);
 static inline daddr_t db_get(struct inode *, int);
@@ -139,6 +143,7 @@
 	TAILQ_INIT(&si->si_snapshots);
 	mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE);
+	si->si_owner = NULL;
 	si->si_gen = 0;
 	si->si_snapblklist = NULL;
 
@@ -172,7 +177,6 @@
 }
 #else /* defined(FFS_NO_SNAPSHOT) */
 	bool suspended = false;
-	bool snapshot_locked = false;
 	int error, redo = 0, snaploc;
 	void *sbbuf = NULL;
 	daddr_t *snaplist = NULL, snaplistsize = 0;
@@ -213,12 +217,6 @@
 	if (error)
 		goto out;
 	/*
-	 * Change inode to snapshot type file.
-	 */
-	ip->i_flags |= SF_SNAPSHOT;
-	DIP_ASSIGN(ip, flags, ip->i_flags);
-	ip->i_flag |= IN_CHANGE | IN_UPDATE;
-	/*
 	 * Copy all the cylinder group maps. Although the
 	 * filesystem is still active, we hope that only a few
 	 * cylinder groups will change between now and when we
@@ -267,15 +265,11 @@
 	if (error)
 		goto out;
 	/*
-	 * Acquire the snapshot lock.
-	 */
-	mutex_enter(&si->si_snaplock);
-	snapshot_locked = true;
-	/*
 	 * Record snapshot inode. Since this is the newest snapshot,
 	 * it must be placed at the end of the list.
 	 */
-	fs->fs_snapinum[snaploc] = ip->i_number;
+	if (ip->i_ffs_effnlink > 0)
+		fs->fs_snapinum[snaploc] = ip->i_number;
 
 	mutex_enter(&si->si_lock);
 	if (is_active_snapshot(si, ip))
@@ -374,15 +368,15 @@
 	si->si_gen++;
 	mutex_exit(&si->si_lock);
 
-	if (snapshot_locked)
-		mutex_exit(&si->si_snaplock);
 	if (suspended) {
+		VOP_UNLOCK(vp, 0);
 		vfs_resume(vp->v_mount);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef DEBUG
 		getmicrotime(&endtime);
 		timersub(&endtime, &starttime, &endtime);
-		printf("%s: suspended %ld.%03ld sec, redo %d of %d\n",
-		    mp->mnt_stat.f_mntonname, (long)endtime.tv_sec,
+		printf("%s: suspended %lld.%03ld sec, redo %d of %d\n",
+		    mp->mnt_stat.f_mntonname, (long long)endtime.tv_sec,
 		    endtime.tv_usec / 1000, redo, fs->fs_ncg);
 #endif
 	}
@@ -391,7 +385,7 @@
 			(void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
 			UFS_WAPBL_END(mp);
 		}
-	} else
+	} else if (ip->i_ffs_effnlink > 0)
 		vref(vp);
 	return (error);
 }
@@ -402,11 +396,13 @@
 static int
 snapshot_setup(struct mount *mp, struct vnode *vp)
 {
-	int error, i, len, loc;
+	int error, n, len, loc, cg;
 	daddr_t blkno, numblks;
 	struct buf *ibp, *nbp;
 	struct fs *fs = VFSTOUFS(mp)->um_fs;
 	struct lwp *l = curlwp;
+	const int wbreak = blocks_in_journal(fs)/8;
+	struct inode *ip = VTOI(vp);
 
 	/*
 	 * Check mount, exclusive reference and owner.
@@ -425,6 +421,11 @@
 		if (error)
 			return error;
 	}
+
+	/* Change inode to snapshot type file. */
+	ip->i_flags |= SF_SNAPSHOT;
+	DIP_ASSIGN(ip, flags, ip->i_flags);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	/*
 	 * Write an empty list of preallocated blocks to the end of
 	 * the snapshot to set size to at least that of the filesystem.
@@ -451,7 +452,7 @@
 	error = UFS_WAPBL_BEGIN(mp);
 	if (error)
 		return error;
-	for (blkno = NDADDR, i = 0; blkno < numblks; blkno += NINDIR(fs)) {
+	for (blkno = NDADDR, n = 0; blkno < numblks; blkno += NINDIR(fs)) {
 		error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno),
 		    fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
 		if (error)
@@ -460,7 +461,7 @@
 			bawrite(ibp);
 		else
 			brelse(ibp, 0);
-		if ((++i % 16) == 0) {
+		if (wbreak > 0 && (++n % wbreak) == 0) {
 			UFS_WAPBL_END(mp);
 			error = UFS_WAPBL_BEGIN(mp);
 			if (error)
@@ -483,6 +484,28 @@
 		if (error)
 			goto out;
 		bawrite(nbp);
+		if (wbreak > 0 && (++n % wbreak) == 0) {
+			UFS_WAPBL_END(mp);
+			error = UFS_WAPBL_BEGIN(mp);
+			if (error)
+				return error;
+		}
+	}
+	/*
+	 * Allocate all cylinder group blocks.
+	 */
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
+		    fs->fs_bsize, l->l_cred, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+		if (wbreak > 0 && (++n % wbreak) == 0) {
+			UFS_WAPBL_END(mp);
+			error = UFS_WAPBL_BEGIN(mp);
+			if (error)
+				return error;
+		}
 	}
 
 out:
@@ -563,8 +586,7 @@
 snapshot_expunge(struct mount *mp, struct vnode *vp, struct fs *copy_fs,
     daddr_t *snaplistsize, daddr_t **snaplist)
 {
-	bool has_wapbl = false;
-	int cg, error, len, loc;
+	int cg, error = 0, len, loc;
 	daddr_t blkno, *blkp;
 	struct fs *fs = VFSTOUFS(mp)->um_fs;
 	struct inode *xp;
@@ -595,10 +617,6 @@
 	 */
 	*snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
 	    FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
-	error = UFS_WAPBL_BEGIN(mp);
-	if (error)
-		goto out;
-	has_wapbl = true;
 	mutex_enter(&mntvnode_lock);
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
@@ -650,19 +668,30 @@
 		if (loc < NDADDR) {
 			len = fragroundup(fs, blkoff(fs, xp->i_size));
 			if (len > 0 && len < fs->fs_bsize) {
+				error = UFS_WAPBL_BEGIN(mp);
+				if (error) {
+					(void)vunmark(mvp);
+					goto out;
+				}
 				ffs_blkfree(copy_fs, vp, db_get(xp, loc),
 				    len, xp->i_number);
 				blkno = db_get(xp, loc);
 				db_assign(xp, loc, 0);
+				UFS_WAPBL_END(mp);
 			}
 		}
 		*snaplistsize += 1;
 		error = expunge(vp, xp, copy_fs, fullacct, BLK_NOCOPY);
 		if (blkno)
 			db_assign(xp, loc, blkno);
-		if (!error)
-			error = ffs_freefile(copy_fs, vp, xp->i_number,
-			    xp->i_mode);
+		if (!error) {
+			error = UFS_WAPBL_BEGIN(mp);
+			if (!error) {
+				error = ffs_freefile(copy_fs, vp,
+				    xp->i_number, xp->i_mode);
+				UFS_WAPBL_END(mp);
+			}
+		}
 		if (error) {
 			(void)vunmark(mvp);
 			goto out;
@@ -687,10 +716,9 @@
 		*blkp++ = blkno + loc;
 	for (; cg < fs->fs_ncg; cg++)
 		*blkp++ = fragstoblks(fs, cgtod(fs, cg));
+	(*snaplist)[0] = blkp - &(*snaplist)[0];
 
 out:
-	if (has_wapbl)
-		UFS_WAPBL_END(mp);
 	if (mvp != NULL)
 		vnfree(mvp);
 	if (logvp != NULL)
@@ -712,25 +740,26 @@
 snapshot_expunge_snap(struct mount *mp, struct vnode *vp,
     struct fs *copy_fs, daddr_t snaplistsize)
 {
-	int error, i;
+	int error = 0, i;
 	daddr_t numblks, *snaplist = NULL;
 	struct fs *fs = VFSTOUFS(mp)->um_fs;
 	struct inode *ip = VTOI(vp), *xp;
 	struct lwp *l = curlwp;
 	struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;
 
-	error = UFS_WAPBL_BEGIN(mp);
-	if (error)
-		return error;
 	TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
-		if (xp == ip)
-			break;
-		error = expunge(vp, xp, fs, snapacct, BLK_SNAP);
-		if (error)
-			break;
+		if (xp != ip) {
+			error = expunge(vp, xp, fs, snapacct, BLK_SNAP);
+			if (error)
+				break;
+		}
 		if (xp->i_ffs_effnlink != 0)
 			continue;
+		error = UFS_WAPBL_BEGIN(mp);
+		if (error)
+			break;
 		error = ffs_freefile(copy_fs, vp, xp->i_number, xp->i_mode);
+		UFS_WAPBL_END(mp);
 		if (error)
 			break;
 	}
@@ -762,12 +791,10 @@
 		snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs));
 	error = vn_rdwr(UIO_WRITE, vp, (void *)snaplist,
 	    snaplistsize * sizeof(daddr_t), lblktosize(fs, (off_t)numblks),
-	    UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED | IO_UNIT,
-	    l->l_cred, NULL, NULL);
+	    UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, l->l_cred, NULL, NULL);
 	for (i = 0; i < snaplistsize; i++)
 		snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs));
 out:
-	UFS_WAPBL_END(mp);
 	if (error && snaplist != NULL) {
 		free(snaplist, M_UFSMNT);
 		ip->i_snapblklist = NULL;
@@ -860,13 +887,10 @@
 static int
 cgaccount(struct vnode *vp, int passno, int *redo)
 {
-	int cg, error;
+	int cg, error = 0;
 	struct buf *nbp;
 	struct fs *fs = VTOI(vp)->i_fs;
 
-	error = UFS_WAPBL_BEGIN(vp->v_mount);
-	if (error)
-		return error;
 	if (redo != NULL)
 		*redo = 0;
 	if (passno == 1)
@@ -875,18 +899,24 @@
 	for (cg = 0; cg < fs->fs_ncg; cg++) {
 		if (passno == 2 && ACTIVECG_ISSET(fs, cg))
 			continue;
+
 		if (redo != NULL)
 			*redo += 1;
+		error = UFS_WAPBL_BEGIN(vp->v_mount);
+		if (error)
+			return error;
 		error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
 		    fs->fs_bsize, curlwp->l_cred, 0, &nbp);
-		if (error)
+		if (error) {
+			UFS_WAPBL_END(vp->v_mount);
 			break;
+		}
 		error = cgaccount1(cg, vp, nbp->b_data, passno);
 		bawrite(nbp);
+		UFS_WAPBL_END(vp->v_mount);
 		if (error)
 			break;
 	}
-	UFS_WAPBL_END(vp->v_mount);
 	return error;
 }
 
@@ -993,8 +1023,14 @@
 	struct lwp *l = curlwp;
 	void *bap;
 	struct buf *bp;
+	struct mount *mp;
 
 	ns = UFS_FSNEEDSWAP(fs);
+	mp = snapvp->v_mount;
+
+	error = UFS_WAPBL_BEGIN(mp);
+	if (error)
+		return error;
 	/*
 	 * Prepare to expunge the inode. If its inode block has not
 	 * yet been copied, then allocate and fill the copy.
@@ -1012,8 +1048,10 @@
 		if (! error)
 			error = rwfsblk(snapvp, B_READ, bp->b_data, lbn);
 	}
-	if (error)
+	if (error) {
+		UFS_WAPBL_END(mp);
 		return error;
+	}
 	/*
 	 * Set a snapshot inode to be a zero length file, regular files
 	 * or unlinked snapshots to be completely unallocated.
@@ -1040,6 +1078,7 @@
 		bzero(&dip2->di_db[0], (NDADDR + NIADDR) * sizeof(int64_t));
 	}
 	bdwrite(bp);
+	UFS_WAPBL_END(mp);
 	/*
 	 * Now go through and expunge all the blocks in the file
 	 * using the function requested.
@@ -1049,13 +1088,15 @@
 		bap = &cancelip->i_ffs1_db[0];
 	else
 		bap = &cancelip->i_ffs2_db[0];
-	if ((error = (*acctfunc)(snapvp, bap, 0, NDADDR, fs, 0, expungetype)))
+	error = (*acctfunc)(snapvp, bap, 0, NDADDR, fs, 0, expungetype);
+	if (error)
 		return (error);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		bap = &cancelip->i_ffs1_ib[0];
 	else
 		bap = &cancelip->i_ffs2_ib[0];
-	if ((error = (*acctfunc)(snapvp, bap, 0, NIADDR, fs, -1, expungetype)))
+	error = (*acctfunc)(snapvp, bap, 0, NIADDR, fs, -1, expungetype);
+	if (error)
 		return (error);
 	blksperindir = 1;
 	lbn = -NDADDR;
@@ -1171,12 +1212,17 @@
 {
 	struct inode *ip = VTOI(vp);
 	struct lwp *l = curlwp;
+	struct mount *mp = vp->v_mount;
 	daddr_t blkno;
 	daddr_t lbn;
 	struct buf *ibp;
-	int error;
+	int error, n;
+	const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;
 
-	for ( ; oldblkp < lastblkp; oldblkp++) {
+	error = UFS_WAPBL_BEGIN(mp);
+	if (error)
+		return error;
+	for ( n = 0; oldblkp < lastblkp; oldblkp++) {
 		blkno = idb_get(ip, bap, oldblkp);
 		if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
 			continue;
@@ -1188,7 +1234,7 @@
 			error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
 			    fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
 			if (error)
-				return (error);
+				break;
 			blkno = idb_get(ip, ibp->b_data,
 			    (lbn - NDADDR) % NINDIR(fs));
 		}
@@ -1212,8 +1258,15 @@
 				bdwrite(ibp);
 			}
 		}
+		if (wbreak > 0 && (++n % wbreak) == 0) {
+			UFS_WAPBL_END(mp);
+			error = UFS_WAPBL_BEGIN(mp);
+			if (error)
+				return error;
+		}
 	}
-	return (0);
+	UFS_WAPBL_END(mp);
+	return error;
 }
 
 /*
@@ -1225,16 +1278,21 @@
 {
 	daddr_t blkno;
 	struct inode *ip;
+	struct mount *mp = vp->v_mount;
 	ino_t inum;
-	int acctit;
+	int acctit, error, n;
+	const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;
 
+	error = UFS_WAPBL_BEGIN(mp);
+	if (error)
+		return error;
 	ip = VTOI(vp);
 	inum = ip->i_number;
 	if (lblkno == -1)
 		acctit = 0;
 	else
 		acctit = 1;
-	for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
+	for ( n = 0; oldblkp < lastblkp; oldblkp++, lblkno++) {
 		blkno = idb_get(ip, bap, oldblkp);
 		if (blkno == 0 || blkno == BLK_NOCOPY)
 			continue;
@@ -1243,9 +1301,43 @@
 		if (blkno == BLK_SNAP)
 			blkno = blkstofrags(fs, lblkno);
 		ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
+		if (wbreak > 0 && (++n % wbreak) == 0) {
+			UFS_WAPBL_END(mp);
+			error = UFS_WAPBL_BEGIN(mp);
+			if (error)
+				return error;
+		}
 	}
+	UFS_WAPBL_END(mp);
 	return (0);
 }
+
+/*
+ * Number of blocks that fit into the journal or zero if not logging.
+ */
+static int
+blocks_in_journal(struct fs *fs)
+{
+	off_t bpj;
+
+	if ((fs->fs_flags & FS_DOWAPBL) == 0)
+		return 0;
+	bpj = 1;
+	if (fs->fs_journal_version == UFS_WAPBL_VERSION) {
+		switch (fs->fs_journal_location) {
+		case UFS_WAPBL_JOURNALLOC_END_PARTITION:
+			bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ]*
+			    fs->fs_journallocs[UFS_WAPBL_EPART_COUNT];
+			break;
+		case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
+			bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ]*
+			    fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
+			break;
+		}
+	}
+	bpj /= fs->fs_bsize;
+	return (bpj > 0 ? bpj : 1);
+}
 #endif /* defined(FFS_NO_SNAPSHOT) */
 
 /*
@@ -1322,6 +1414,7 @@
 	 *
 	 * Clear copy-on-write flag if last snapshot.
 	 */
+	mutex_enter(&si->si_snaplock);
 	mutex_enter(&si->si_lock);
 	if (is_active_snapshot(si, ip)) {
 		TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap);
@@ -1331,18 +1424,22 @@
 			si->si_snapblklist = xp->i_snapblklist;
 			si->si_gen++;
 			mutex_exit(&si->si_lock);
+			mutex_exit(&si->si_snaplock);
 		} else {
 			si->si_snapblklist = 0;
 			si->si_gen++;
 			mutex_exit(&si->si_lock);
+			mutex_exit(&si->si_snaplock);
 			fscow_disestablish(mp, ffs_copyonwrite, devvp);
 		}
 		if (ip->i_snapblklist != NULL) {
 			free(ip->i_snapblklist, M_UFSMNT);
 			ip->i_snapblklist = NULL;
 		}
-	} else
+	} else {
 		mutex_exit(&si->si_lock);
+		mutex_exit(&si->si_snaplock);
+	}
 	/*
 	 * Clear all BLK_NOCOPY fields. Pass any block claims to other
 	 * snapshots that want them (see ffs_snapblkfree below).
@@ -1380,6 +1477,9 @@
 			}
 		}
 		bawrite(ibp);
+		UFS_WAPBL_END(mp);
+		error = UFS_WAPBL_BEGIN(mp);
+		KASSERT(error == 0);
 	}
 	/*
 	 * Clear snapshot flag and drop reference.
@@ -1420,25 +1520,18 @@
 	daddr_t lbn;
 	daddr_t blkno;
 	uint32_t gen;
-	int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0;
+	int indiroff = 0, error = 0, claimedblk = 0;
 
 	si = VFSTOUFS(mp)->um_snapinfo;
 	lbn = fragstoblks(fs, bno);
+	mutex_enter(&si->si_snaplock);
 	mutex_enter(&si->si_lock);
+	si->si_owner = curlwp;
+		
 retry:
 	gen = si->si_gen;
 	TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
 		vp = ITOV(ip);
-		if (snapshot_locked == 0) {
-			if (!mutex_tryenter(&si->si_snaplock)) {
-				mutex_exit(&si->si_lock);
-				mutex_enter(&si->si_snaplock);
-				mutex_enter(&si->si_lock);
-			}
-			snapshot_locked = 1;
-			if (gen != si->si_gen)
-				goto retry;
-		}
 		/*
 		 * Lookup block being written.
 		 */
@@ -1535,6 +1628,9 @@
 				error = syncsnap(vp);
 			else
 				error = 0;
+			mutex_enter(&si->si_lock);
+			si->si_owner = NULL;
+			mutex_exit(&si->si_lock);
 			mutex_exit(&si->si_snaplock);
 			return (error == 0);
 		}
@@ -1574,7 +1670,9 @@
 		if (gen != si->si_gen)
 			goto retry;
 	}
+	si->si_owner = NULL;
 	mutex_exit(&si->si_lock);
+	mutex_exit(&si->si_snaplock);
 	if (saved_data)
 		free(saved_data, M_UFSMNT);
 	/*
@@ -1583,8 +1681,6 @@
 	 * not be freed. Although space will be lost, the snapshot
 	 * will stay consistent.
 	 */
-	if (snapshot_locked)
-		mutex_exit(&si->si_snaplock);
 	return (error);
 }
 
@@ -1723,12 +1819,11 @@
 	mutex_enter(&si->si_lock);
 	while ((xp = TAILQ_FIRST(&si->si_snapshots)) != 0) {
 		vp = ITOV(xp);
-		vp->v_vnlock = &vp->v_lock;
 		TAILQ_REMOVE(&si->si_snapshots, xp, i_nextsnap);
 		if (xp->i_snapblklist == si->si_snapblklist)
 			si->si_snapblklist = NULL;
 		FREE(xp->i_snapblklist, M_UFSMNT);
-		if (xp->i_ffs_effnlink > 0) {
+		if (xp->i_ffs_effnlink != 0) {
 			si->si_gen++;
 			mutex_exit(&si->si_lock);
 			vrele(vp);
@@ -1798,6 +1893,15 @@
 	/*
 	 * Not in the precomputed list, so check the snapshots.
 	 */
+	 if (si->si_owner != curlwp) {
+		if (!mutex_tryenter(&si->si_snaplock)) {
+			mutex_exit(&si->si_lock);
+			mutex_enter(&si->si_snaplock);
+			mutex_enter(&si->si_lock);
+		}
+		si->si_owner = curlwp;
+		snapshot_locked = 1;
+	 }
 	 if (data_valid && bp->b_bcount == fs->fs_bsize)
 		saved_data = bp->b_data;
 retry:
@@ -1819,6 +1923,7 @@
 			blkno = db_get(ip, lbn);
 		} else {
 			mutex_exit(&si->si_lock);
+			blkno = 0; /* XXX: GCC */
 			if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) {
 				mutex_enter(&si->si_lock);
 				break;
@@ -1838,34 +1943,8 @@
 			error = ENOMEM;
 			break;
 		}
-
-		if (snapshot_locked == 0) {
-			if (!mutex_tryenter(&si->si_snaplock)) {
-				mutex_exit(&si->si_lock);
-				mutex_enter(&si->si_snaplock);
-				mutex_enter(&si->si_lock);
-			}
-			snapshot_locked = 1;
-			if (gen != si->si_gen)
-				goto retry;
-
-			/* Check again if block still needs to be copied */
-			if (lbn < NDADDR) {
-				blkno = db_get(ip, lbn);
-			} else {
-				mutex_exit(&si->si_lock);
-				if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) {
-					mutex_enter(&si->si_lock);
-					break;
-				}
-				mutex_enter(&si->si_lock);
-				if (gen != si->si_gen)
-					goto retry;
-			}
-
-			if (blkno != 0)
-				continue;
-		}
+		/* Only one level of recursion allowed. */
+		KASSERT(snapshot_locked);
 		/*
 		 * Allocate the block into which to do the copy. Since
 		 * multiple processes may all try to copy the same block,
@@ -1920,11 +1999,14 @@
 	 * have not been unlinked, and hence will be visible after
 	 * a crash, to ensure their integrity.
 	 */
-	mutex_exit(&si->si_lock);
+	if (snapshot_locked) {
+		si->si_owner = NULL;
+		mutex_exit(&si->si_lock);
+		mutex_exit(&si->si_snaplock);
+	} else
+		mutex_exit(&si->si_lock);
 	if (saved_data && saved_data != bp->b_data)
 		free(saved_data, M_UFSMNT);
-	if (snapshot_locked)
-		mutex_exit(&si->si_snaplock);
 	return error;
 }
 
@@ -2084,9 +2166,14 @@
 
 	mutex_enter(&bufcache_lock);
 	while ((bp = LIST_FIRST(&vp->v_dirtyblkhd))) {
-		KASSERT((bp->b_cflags & BC_BUSY) == 0);
+		error = bbusy(bp, false, 0, NULL);
+		if (error == EPASSTHROUGH)
+			continue;
+		else if (error != 0) {
+			mutex_exit(&bufcache_lock);
+			return error;
+		}
 		KASSERT(bp->b_bcount == fs->fs_bsize);
-		bp->b_cflags |= BC_BUSY;
 		mutex_exit(&bufcache_lock);
 		error = rwfsblk(vp, B_WRITE, bp->b_data,
 		    fragstoblks(fs, dbtofsb(fs, bp->b_blkno)));

Index: src/usr.sbin/fssconfig/fssconfig.c
diff -u src/usr.sbin/fssconfig/fssconfig.c:1.6 src/usr.sbin/fssconfig/fssconfig.c:1.6.6.1
--- src/usr.sbin/fssconfig/fssconfig.c:1.6	Mon Apr 28 20:24:16 2008
+++ src/usr.sbin/fssconfig/fssconfig.c	Sat Jun 18 17:00:26 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: fssconfig.c,v 1.6 2008/04/28 20:24:16 martin Exp $	*/
+/*	$NetBSD: fssconfig.c,v 1.6.6.1 2011/06/18 17:00:26 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -168,15 +168,16 @@
 		err(1, "open: %s", argv[0]);
 	}
 
+	if ((xflag || istmp) && isreg)
+		fss.fss_flags |= FSS_UNLINK_ON_CREATE;
+	else
+		fss.fss_flags = 0;
 	if (ioctl(fd, FSSIOCSET, &fss) < 0) {
 		if (istmp)
 			unlink(fss.fss_bstore);
 		err(1, "%s: FSSIOCSET", full);
 	}
 
-	if ((xflag || istmp) && isreg && unlink(fss.fss_bstore) < 0)
-		err(1, "unlink: %s", fss.fss_bstore);
-
 	if (vflag)
 		list(1, argv);
 }

Reply via email to