Module Name:    src
Committed By:   perseant
Date:           Thu Feb 16 02:47:56 UTC 2012

Modified Files:
        src/sbin/newfs_lfs: make_lfs.c
        src/sys/ufs/lfs: lfs.h lfs_alloc.c lfs_bio.c lfs_segment.c lfs_vfsops.c
            lfs_vnops.c
        src/tests/fs/vfs: t_renamerace.c t_rmdirrace.c

Log Message:
Pass t_renamerace and t_rmdirrace tests.

Adapt dholland@'s fix to ufs_rename to fix PR kern/43582.  Address several
other MP locking issues discovered during the course of investigating the
same problem.

Removed extraneous vn_lock() calls on the Ifile, since the Ifile writes
are controlled by the segment lock.

Fix PR kern/45982 by deemphasizing the estimate of how much metadata
will fill the empty space on disk when the disk is nearly empty
(t_renamerace crates a lot of inode blocks on a tiny empty disk).


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 src/sbin/newfs_lfs/make_lfs.c
cvs rdiff -u -r1.135 -r1.136 src/sys/ufs/lfs/lfs.h
cvs rdiff -u -r1.111 -r1.112 src/sys/ufs/lfs/lfs_alloc.c
cvs rdiff -u -r1.121 -r1.122 src/sys/ufs/lfs/lfs_bio.c
cvs rdiff -u -r1.223 -r1.224 src/sys/ufs/lfs/lfs_segment.c
cvs rdiff -u -r1.293 -r1.294 src/sys/ufs/lfs/lfs_vfsops.c
cvs rdiff -u -r1.239 -r1.240 src/sys/ufs/lfs/lfs_vnops.c
cvs rdiff -u -r1.24 -r1.25 src/tests/fs/vfs/t_renamerace.c
cvs rdiff -u -r1.8 -r1.9 src/tests/fs/vfs/t_rmdirrace.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sbin/newfs_lfs/make_lfs.c
diff -u src/sbin/newfs_lfs/make_lfs.c:1.18 src/sbin/newfs_lfs/make_lfs.c:1.19
--- src/sbin/newfs_lfs/make_lfs.c:1.18	Thu Feb  2 03:50:32 2012
+++ src/sbin/newfs_lfs/make_lfs.c	Thu Feb 16 02:47:54 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: make_lfs.c,v 1.18 2012/02/02 03:50:32 perseant Exp $	*/
+/*	$NetBSD: make_lfs.c,v 1.19 2012/02/16 02:47:54 perseant Exp $	*/
 
 /*-
  * Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
 #if 0
 static char sccsid[] = "@(#)lfs.c	8.5 (Berkeley) 5/24/95";
 #else
-__RCSID("$NetBSD: make_lfs.c,v 1.18 2012/02/02 03:50:32 perseant Exp $");
+__RCSID("$NetBSD: make_lfs.c,v 1.19 2012/02/16 02:47:54 perseant Exp $");
 #endif
 #endif /* not lint */
 
@@ -496,7 +496,7 @@ make_lfs(int devfd, uint secsize, struct
 	if (fs->lfs_resvseg < MIN_RESV_SEGS)
 		fs->lfs_resvseg = MIN_RESV_SEGS;
 
-	if(fs->lfs_nseg < (3 * CM_MAG_NUM * fs->lfs_minfreeseg) / CM_MAG_DEN + 1
+	if(fs->lfs_nseg < (4 * fs->lfs_minfreeseg)
 	   || fs->lfs_nseg < LFS_MIN_SBINTERVAL + 1)
 	{
 		if(seg_size == 0 && ssize > (bsize<<1)) {

Index: src/sys/ufs/lfs/lfs.h
diff -u src/sys/ufs/lfs/lfs.h:1.135 src/sys/ufs/lfs/lfs.h:1.136
--- src/sys/ufs/lfs/lfs.h:1.135	Mon Jan  2 22:10:44 2012
+++ src/sys/ufs/lfs/lfs.h	Thu Feb 16 02:47:55 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs.h,v 1.135 2012/01/02 22:10:44 perseant Exp $	*/
+/*	$NetBSD: lfs.h,v 1.136 2012/02/16 02:47:55 perseant Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -1027,13 +1027,23 @@ struct lfs_inode_ext {
 /*
  * Estimate number of clean blocks not available for writing because
  * they will contain metadata or overhead.  This is calculated as
- * (dmeta / # dirty segments) * (# clean segments).
+ *
+ *		E = ((C * M / D) * D + (0) * (T - D)) / T
+ * or more simply
+ *		E = (C * M) / T
+ *
+ * where
+ * C is the clean space,
+ * D is the dirty space,
+ * M is the dirty metadata, and
+ * T = C + D is the total space on disk.
+ *
+ * This approximates the old formula of E = C * M / D when D is close to T,
+ * but avoids falsely reporting "disk full" when the sample size (D) is small.
  */
-#define CM_MAG_NUM 3
-#define CM_MAG_DEN 2
 #define LFS_EST_CMETA(F) (int32_t)((					\
-				    (CM_MAG_NUM * ((F)->lfs_dmeta * (int64_t)(F)->lfs_nclean)) / \
-				    (CM_MAG_DEN * ((F)->lfs_nseg - (F)->lfs_nclean))))
+	((F)->lfs_dmeta * (int64_t)(F)->lfs_nclean) / 			\
+	((F)->lfs_nseg)))
 
 /* Estimate total size of the disk not including metadata */
 #define LFS_EST_NONMETA(F) ((F)->lfs_dsize - (F)->lfs_dmeta - LFS_EST_CMETA(F))

Index: src/sys/ufs/lfs/lfs_alloc.c
diff -u src/sys/ufs/lfs/lfs_alloc.c:1.111 src/sys/ufs/lfs/lfs_alloc.c:1.112
--- src/sys/ufs/lfs/lfs_alloc.c:1.111	Sun Jun 12 03:36:01 2011
+++ src/sys/ufs/lfs/lfs_alloc.c	Thu Feb 16 02:47:55 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_alloc.c,v 1.111 2011/06/12 03:36:01 rmind Exp $	*/
+/*	$NetBSD: lfs_alloc.c,v 1.112 2012/02/16 02:47:55 perseant Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.111 2011/06/12 03:36:01 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.112 2012/02/16 02:47:55 perseant Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -207,7 +207,6 @@ lfs_valloc(struct vnode *pvp, int mode, 
 	ASSERT_NO_SEGLOCK(fs);
 
 	lfs_seglock(fs, SEGM_PROT);
-	vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE);
 
 	/* Get the head of the freelist. */
 	LFS_GET_HEADFREE(fs, cip, cbp, &new_ino);
@@ -236,7 +235,6 @@ lfs_valloc(struct vnode *pvp, int mode, 
 	if (fs->lfs_freehd == LFS_UNUSED_INUM) {
 		if ((error = lfs_extend_ifile(fs, cred)) != 0) {
 			LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
-			VOP_UNLOCK(fs->lfs_ivnode);
 			lfs_segunlock(fs);
 			return error;
 		}
@@ -252,7 +250,6 @@ lfs_valloc(struct vnode *pvp, int mode, 
 	mutex_exit(&lfs_lock);
 	++fs->lfs_nfiles;
 
-	VOP_UNLOCK(fs->lfs_ivnode);
 	lfs_segunlock(fs);
 
 	return lfs_ialloc(fs, pvp, new_ino, new_gen, vpp);
@@ -440,7 +437,6 @@ lfs_vfree(struct vnode *vp, ino_t ino, i
 	mutex_exit(vp->v_interlock);
 
 	lfs_seglock(fs, SEGM_PROT);
-	vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE);
 
 	lfs_unmark_vnode(vp);
 	mutex_enter(&lfs_lock);
@@ -575,7 +571,6 @@ lfs_vfree(struct vnode *vp, ino_t ino, i
 	mutex_exit(&lfs_lock);
 	--fs->lfs_nfiles;
 
-	VOP_UNLOCK(fs->lfs_ivnode);
 	lfs_segunlock(fs);
 
 	return (0);

Index: src/sys/ufs/lfs/lfs_bio.c
diff -u src/sys/ufs/lfs/lfs_bio.c:1.121 src/sys/ufs/lfs/lfs_bio.c:1.122
--- src/sys/ufs/lfs/lfs_bio.c:1.121	Mon Jan  2 22:10:44 2012
+++ src/sys/ufs/lfs/lfs_bio.c	Thu Feb 16 02:47:55 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_bio.c,v 1.121 2012/01/02 22:10:44 perseant Exp $	*/
+/*	$NetBSD: lfs_bio.c,v 1.122 2012/02/16 02:47:55 perseant Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.121 2012/01/02 22:10:44 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.122 2012/02/16 02:47:55 perseant Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -151,12 +151,15 @@ static int
 lfs_reservebuf(struct lfs *fs, struct vnode *vp,
     struct vnode *vp2, int n, int bytes)
 {
+	int cantwait;
+
 	ASSERT_MAYBE_SEGLOCK(fs);
 	KASSERT(locked_queue_rcount >= 0);
 	KASSERT(locked_queue_rbytes >= 0);
 
+	cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
 	mutex_enter(&lfs_lock);
-	while (n > 0 && !lfs_fits_buf(fs, n, bytes)) {
+	while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) {
 		int error;
 
 		lfs_flush(fs, 0, 0);
@@ -213,28 +216,15 @@ lfs_reserveavail(struct lfs *fs, struct 
 	CLEANERINFO *cip;
 	struct buf *bp;
 	int error, slept;
+	int cantwait;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
 	slept = 0;
 	mutex_enter(&lfs_lock);
-	while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
+	cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
+	while (!cantwait && fsb > 0 &&
+	       !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
 		mutex_exit(&lfs_lock);
-#if 0
-		/*
-		 * XXX ideally, we should unlock vnodes here
-		 * because we might sleep very long time.
-		 */
-		VOP_UNLOCK(vp);
-		if (vp2 != NULL) {
-			VOP_UNLOCK(vp2);
-		}
-#else
-		/*
-		 * XXX since we'll sleep for cleaner with vnode lock holding,
-		 * deadlock will occur if cleaner tries to lock the vnode.
-		 * (eg. lfs_markv -> lfs_fastvget -> getnewvnode -> vclean)
-		 */
-#endif
 
 		if (!slept) {
 			DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %d,"
@@ -256,10 +246,6 @@ lfs_reserveavail(struct lfs *fs, struct 
 
 		error = mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve",
 				0, &lfs_lock);
-#if 0
-		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
-		vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
-#endif
 		if (error) {
 			mutex_exit(&lfs_lock);
 			return error;
@@ -285,7 +271,6 @@ int
 lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
 {
 	int error;
-	int cantwait;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
 	if (vp2) {
@@ -300,30 +285,18 @@ lfs_reserve(struct lfs *fs, struct vnode
 
 	KASSERT(fsb < 0 || VOP_ISLOCKED(vp));
 	KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2));
-	KASSERT(vp2 == NULL || !(VTOI(vp2)->i_flag & IN_ADIROP));
 	KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp);
 
-	cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
 #ifdef DIAGNOSTIC
-	if (cantwait) {
-		if (fsb > 0)
-			lfs_rescountdirop++;
-		else if (fsb < 0)
-			lfs_rescountdirop--;
-		if (lfs_rescountdirop < 0)
-			panic("lfs_rescountdirop");
-	}
-	else {
-		if (fsb > 0)
-			lfs_rescount++;
-		else if (fsb < 0)
-			lfs_rescount--;
-		if (lfs_rescount < 0)
-			panic("lfs_rescount");
-	}
+	mutex_enter(&lfs_lock);
+	if (fsb > 0)
+		lfs_rescount++;
+	else if (fsb < 0)
+		lfs_rescount--;
+	if (lfs_rescount < 0)
+		panic("lfs_rescount");
+	mutex_exit(&lfs_lock);
 #endif
-	if (cantwait)
-		return 0;
 
 	/*
 	 * XXX

Index: src/sys/ufs/lfs/lfs_segment.c
diff -u src/sys/ufs/lfs/lfs_segment.c:1.223 src/sys/ufs/lfs/lfs_segment.c:1.224
--- src/sys/ufs/lfs/lfs_segment.c:1.223	Mon Jan  2 22:10:44 2012
+++ src/sys/ufs/lfs/lfs_segment.c	Thu Feb 16 02:47:55 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_segment.c,v 1.223 2012/01/02 22:10:44 perseant Exp $	*/
+/*	$NetBSD: lfs_segment.c,v 1.224 2012/02/16 02:47:55 perseant Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.223 2012/01/02 22:10:44 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.224 2012/02/16 02:47:55 perseant Exp $");
 
 #ifdef DEBUG
 # define vndebug(vp, str) do {						\
@@ -734,7 +734,6 @@ lfs_segwrite(struct mount *mp, int flags
 	did_ckp = 0;
 	if (do_ckp || fs->lfs_doifile) {
 		vp = fs->lfs_ivnode;
-		vn_lock(vp, LK_EXCLUSIVE);
 		loopcount = 0;
 		do {
 #ifdef DEBUG
@@ -807,7 +806,6 @@ lfs_segwrite(struct mount *mp, int flags
 		}
 #endif
 		mutex_exit(vp->v_interlock);
-		VOP_UNLOCK(vp);
 	} else {
 		(void) lfs_writeseg(fs, sp);
 	}
@@ -2603,8 +2601,8 @@ lfs_cluster_aiodone(struct buf *bp)
 		 * XXX KS - Shouldn't we set *both* if both types
 		 * of blocks are present (traverse the dirty list?)
 		 */
-		mutex_enter(&lfs_lock);
 		mutex_enter(vp->v_interlock);
+		mutex_enter(&lfs_lock);
 		if (vp != devvp && vp->v_numoutput == 0 &&
 		    (fbp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) {
 			ip = VTOI(vp);
@@ -2616,8 +2614,8 @@ lfs_cluster_aiodone(struct buf *bp)
 				LFS_SET_UINO(ip, IN_MODIFIED);
 		}
 		cv_broadcast(&vp->v_cv);
-		mutex_exit(vp->v_interlock);
 		mutex_exit(&lfs_lock);
+		mutex_exit(vp->v_interlock);
 	}
 
 	/* Fix up the cluster buffer, and release it */

Index: src/sys/ufs/lfs/lfs_vfsops.c
diff -u src/sys/ufs/lfs/lfs_vfsops.c:1.293 src/sys/ufs/lfs/lfs_vfsops.c:1.294
--- src/sys/ufs/lfs/lfs_vfsops.c:1.293	Wed Jan  4 02:48:58 2012
+++ src/sys/ufs/lfs/lfs_vfsops.c	Thu Feb 16 02:47:55 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_vfsops.c,v 1.293 2012/01/04 02:48:58 perseant Exp $	*/
+/*	$NetBSD: lfs_vfsops.c,v 1.294 2012/02/16 02:47:55 perseant Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.293 2012/01/04 02:48:58 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.294 2012/02/16 02:47:55 perseant Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_lfs.h"
@@ -2089,7 +2089,6 @@ lfs_resize_fs(struct lfs *fs, int newnse
 	 * (XXX this could be done better.)
 	 */
 	rw_enter(&fs->lfs_iflock, RW_WRITER);
-	vn_lock(ivp, LK_EXCLUSIVE | LK_RETRY);
 	for (i = 0; i < ilast; i++) {
 		bread(ivp, i, fs->lfs_bsize, NOCRED, 0, &bp);
 		brelse(bp, 0);
@@ -2205,7 +2204,6 @@ lfs_resize_fs(struct lfs *fs, int newnse
 	VOP_BWRITE(bp->b_vp, bp);
 
 	/* Let Ifile accesses proceed */
-	VOP_UNLOCK(ivp);
 	rw_exit(&fs->lfs_iflock);
 
     out:

Index: src/sys/ufs/lfs/lfs_vnops.c
diff -u src/sys/ufs/lfs/lfs_vnops.c:1.239 src/sys/ufs/lfs/lfs_vnops.c:1.240
--- src/sys/ufs/lfs/lfs_vnops.c:1.239	Mon Jan  2 22:10:45 2012
+++ src/sys/ufs/lfs/lfs_vnops.c	Thu Feb 16 02:47:55 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_vnops.c,v 1.239 2012/01/02 22:10:45 perseant Exp $	*/
+/*	$NetBSD: lfs_vnops.c,v 1.240 2012/02/16 02:47:55 perseant Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.239 2012/01/02 22:10:45 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.240 2012/02/16 02:47:55 perseant Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -91,6 +91,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_bswap.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <uvm/uvm.h>
@@ -437,7 +438,6 @@ lfs_set_dirop(struct vnode *dvp, struct 
 	}
 
 	if (lfs_dirvcount > LFS_MAX_DIROP) {
-		mutex_exit(&lfs_lock);
 		DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
 		      "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
 		if ((error = mtsleep(&lfs_dirvcount,
@@ -554,9 +554,11 @@ lfs_mark_vnode(struct vnode *vp)
 	mutex_enter(&lfs_lock);
 	if (!(ip->i_flag & IN_ADIROP)) {
 		if (!(vp->v_uflag & VU_DIROP)) {
+			mutex_exit(&lfs_lock);
 			mutex_enter(vp->v_interlock);
 			if (lfs_vref(vp) != 0)
 				panic("lfs_mark_vnode: could not vref");
+			mutex_enter(&lfs_lock);
 			++lfs_dirvcount;
 			++fs->lfs_dirvcount;
 			TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
@@ -575,13 +577,13 @@ lfs_unmark_vnode(struct vnode *vp)
 {
 	struct inode *ip = VTOI(vp);
 
+	mutex_enter(&lfs_lock);
 	if (ip && (ip->i_flag & IN_ADIROP)) {
 		KASSERT(vp->v_uflag & VU_DIROP);
-		mutex_enter(&lfs_lock);
 		--ip->i_lfs->lfs_nadirop;
-		mutex_exit(&lfs_lock);
 		ip->i_flag &= ~IN_ADIROP;
 	}
+	mutex_exit(&lfs_lock);
 }
 
 int
@@ -808,6 +810,188 @@ lfs_link(void *v)
 	return (error);
 }
 
+/* XXX following lifted from ufs_lookup.c */
+#define	FSFMT(vp)	(((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0)
+
+/*
+ * Check if either entry referred to by FROM_ULR is within the range
+ * of entries named by TO_ULR.
+ */
+static int
+ulr_overlap(const struct ufs_lookup_results *from_ulr,
+	    const struct ufs_lookup_results *to_ulr)
+{
+	doff_t from_start, from_prevstart;
+	doff_t to_start, to_end;
+
+	/*
+	 * FROM is a DELETE result; offset points to the entry to
+	 * remove and subtracting count gives the previous entry.
+	 */
+	from_start = from_ulr->ulr_offset - from_ulr->ulr_count;
+	from_prevstart = from_ulr->ulr_offset;
+
+	/*
+	 * TO is a RENAME (thus non-DELETE) result; offset points
+	 * to the beginning of a region to write in, and adding
+	 * count gives the end of the region.
+	 */
+	to_start = to_ulr->ulr_offset;
+	to_end = to_ulr->ulr_offset + to_ulr->ulr_count;
+
+	if (from_prevstart >= to_start && from_prevstart < to_end) {
+		return 1;
+	}
+	if (from_start >= to_start && from_start < to_end) {
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static const struct dirtemplate mastertemplate = {
+	0,	12,		DT_DIR,	1,	".",
+	0,	DIRBLKSIZ - 12,	DT_DIR,	2,	".."
+};
+
+/*
+ * Wrapper for relookup that also updates the supplemental results.
+ */
+static int
+do_relookup(struct vnode *dvp, struct ufs_lookup_results *ulr,
+	    struct vnode **vp, struct componentname *cnp)
+{
+	int error;
+
+	error = relookup(dvp, vp, cnp, 0);
+	if (error) {
+		return error;
+	}
+	/* update the supplemental reasults */
+	*ulr = VTOI(dvp)->i_crap;
+	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
+	return 0;
+}
+
+/*
+ * Lock and relookup a sequence of two directories and two children.
+ *
+ */
+static int
+lock_vnode_sequence(struct vnode *d1, struct ufs_lookup_results *ulr1,
+		    struct vnode **v1_ret, struct componentname *cn1, 
+		    int v1_missing_ok,
+		    int overlap_error,
+		    struct vnode *d2, struct ufs_lookup_results *ulr2,
+		    struct vnode **v2_ret, struct componentname *cn2, 
+		    int v2_missing_ok)
+{
+	struct vnode *v1, *v2;
+	int error;
+
+	KASSERT(d1 != d2);
+
+	vn_lock(d1, LK_EXCLUSIVE | LK_RETRY);
+	if (VTOI(d1)->i_size == 0) {
+		/* d1 has been rmdir'd */
+		VOP_UNLOCK(d1);
+		return ENOENT;
+	}
+	error = do_relookup(d1, ulr1, &v1, cn1);
+	if (v1_missing_ok) {
+		if (error == ENOENT) {
+			/*
+			 * Note: currently if the name doesn't exist,
+			 * relookup succeeds (it intercepts the
+			 * EJUSTRETURN from VOP_LOOKUP) and sets tvp
+			 * to NULL. Therefore, we will never get
+			 * ENOENT and this branch is not needed.
+			 * However, in a saner future the EJUSTRETURN
+			 * garbage will go away, so let's DTRT.
+			 */
+			v1 = NULL;
+			error = 0;
+		}
+	} else {
+		if (error == 0 && v1 == NULL) {
+			/* This is what relookup sets if v1 disappeared. */
+			error = ENOENT;
+		}
+	}
+	if (error) {
+		VOP_UNLOCK(d1);
+		return error;
+	}
+	if (v1 && v1 == d2) {
+		VOP_UNLOCK(d1);
+		VOP_UNLOCK(v1);
+		vrele(v1);
+		return overlap_error;
+	}
+
+	/*
+	 * The right way to do this is to do lookups without locking
+	 * the results, and lock the results afterwards; then at the
+	 * end we can avoid trying to lock v2 if v2 == v1.
+	 *
+	 * However, for the reasons described in the fdvp == tdvp case
+	 * in rename below, we can't do that safely. So, in the case
+	 * where v1 is not a directory, unlock it and lock it again
+	 * afterwards. This is safe in locking order because a
+	 * non-directory can't be above anything else in the tree. If
+	 * v1 *is* a directory, that's not true, but then because d1
+	 * != d2, v1 != v2.
+	 */
+	if (v1 && v1->v_type != VDIR) {
+		VOP_UNLOCK(v1);
+	}
+	vn_lock(d2, LK_EXCLUSIVE | LK_RETRY);
+	if (VTOI(d2)->i_size == 0) {
+		/* d2 has been rmdir'd */
+		VOP_UNLOCK(d2);
+		if (v1 && v1->v_type == VDIR) {
+			VOP_UNLOCK(v1);
+		}
+		VOP_UNLOCK(d1);
+		if (v1) {
+			vrele(v1);
+		}
+		return ENOENT;
+	}
+	error = do_relookup(d2, ulr2, &v2, cn2);
+	if (v2_missing_ok) {
+		if (error == ENOENT) {
+			/* as above */
+			v2 = NULL;
+			error = 0;
+		}
+	} else {
+		if (error == 0 && v2 == NULL) {
+			/* This is what relookup sets if v2 disappeared. */
+			error = ENOENT;
+		}
+	}
+	if (error) {
+		VOP_UNLOCK(d2);
+		if (v1 && v1->v_type == VDIR) {
+			VOP_UNLOCK(v1);
+		}
+		VOP_UNLOCK(d1);
+		if (v1) {
+			vrele(v1);
+		}
+		return error;
+	}
+	if (v1 && v1->v_type != VDIR && v1 != v2) {
+		vn_lock(v1, LK_EXCLUSIVE | LK_RETRY);
+	}
+	*v1_ret = v1;
+	*v2_ret = v2;
+	return 0;
+}
+
 int
 lfs_rename(void *v)
 {
@@ -819,64 +1003,239 @@ lfs_rename(void *v)
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap = v;
-	struct vnode *tvp, *fvp, *tdvp, *fdvp;
+	struct vnode		*tvp, *tdvp, *fvp, *fdvp;
 	struct componentname *tcnp, *fcnp;
-	int error;
-	struct lfs *fs;
+	struct inode		*ip, *txp, *fxp, *tdp, *fdp;
+	struct mount		*mp;
+	struct direct		*newdir;
+	int			doingdirectory, error, marked;
+	ino_t			oldparent, newparent;
+
+	struct ufs_lookup_results from_ulr, to_ulr;
+	struct lfs *fs = VTOI(ap->a_fvp)->i_lfs;
 
-	fs = VTOI(ap->a_fdvp)->i_lfs;
 	tvp = ap->a_tvp;
 	tdvp = ap->a_tdvp;
-	tcnp = ap->a_tcnp;
 	fvp = ap->a_fvp;
 	fdvp = ap->a_fdvp;
+	tcnp = ap->a_tcnp;
 	fcnp = ap->a_fcnp;
+	doingdirectory = error = 0;
+	oldparent = newparent = 0;
+	marked = 0;
+
+	/* save the supplemental lookup results as they currently exist */
+	from_ulr = VTOI(fdvp)->i_crap;
+	to_ulr = VTOI(tdvp)->i_crap;
+	UFS_CHECK_CRAPCOUNTER(VTOI(fdvp));
+	UFS_CHECK_CRAPCOUNTER(VTOI(tdvp));
+
+	/*
+	 * Owing to VFS oddities we are currently called with tdvp/tvp
+	 * locked and not fdvp/fvp. In a sane world we'd be passed
+	 * tdvp and fdvp only, unlocked, and two name strings. Pretend
+	 * we have a sane world and unlock tdvp and tvp.
+	 */
+	VOP_UNLOCK(tdvp);
+	if (tvp && tvp != tdvp) {
+		VOP_UNLOCK(tvp);
+	}
+
+	/* Also pretend we have a sane world and vrele fvp/tvp. */
+	vrele(fvp);
+	fvp = NULL;
+	if (tvp) {
+		vrele(tvp);
+		tvp = NULL;
+	}
 
 	/*
 	 * Check for cross-device rename.
-	 * If it is, we don't want to set dirops, just error out.
-	 * (In particular note that MARK_VNODE(tdvp) will DTWT on
-	 * a cross-device rename.)
-	 *
-	 * Copied from ufs_rename.
 	 */
-	if ((fvp->v_mount != tdvp->v_mount) ||
-	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+	if (fdvp->v_mount != tdvp->v_mount) {
 		error = EXDEV;
-		goto errout;
+		goto abort;
 	}
 
 	/*
-	 * Check to make sure we're not renaming a vnode onto itself
-	 * (deleting a hard link by renaming one name onto another);
-	 * if we are we can't recursively call VOP_REMOVE since that
-	 * would leave us with an unaccounted-for number of live dirops.
+	 * Reject "." and ".."
+	 */
+	if ((fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) ||
+	    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+	    (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')) {
+		error = EINVAL;
+		goto abort;
+	}
+	    
+	/*
+	 * Get locks.
+	 */
+
+	/* paranoia */
+	fcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
+	tcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
+
+	if (fdvp == tdvp) {
+		/* One directory. Lock it and relookup both children. */
+		vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
+
+		if (VTOI(fdvp)->i_size == 0) {
+			/* directory has been rmdir'd */
+			VOP_UNLOCK(fdvp);
+			error = ENOENT;
+			goto abort;
+		}
+
+		error = do_relookup(fdvp, &from_ulr, &fvp, fcnp);
+		if (error == 0 && fvp == NULL) {
+			/* relookup may produce this if fvp disappears */
+			error = ENOENT;
+		}
+		if (error) {
+			VOP_UNLOCK(fdvp);
+			goto abort;
+		}
+
+		/*
+		 * The right way to do this is to look up both children
+		 * without locking either, and then lock both unless they
+		 * turn out to be the same. However, due to deep-seated
+		 * VFS-level issues all lookups lock the child regardless
+		 * of whether LOCKLEAF is set (if LOCKLEAF is not set,
+		 * the child is locked during lookup and then unlocked)
+		 * so it is not safe to look up tvp while fvp is locked.
+		 *
+		 * Unlocking fvp here temporarily is more or less safe,
+		 * because with the directory locked there's not much
+		 * that can happen to it. However, ideally it wouldn't
+		 * be necessary. XXX.
+		 */
+		VOP_UNLOCK(fvp);
+		/* remember fdvp == tdvp so tdvp is locked */
+		error = do_relookup(tdvp, &to_ulr, &tvp, tcnp);
+		if (error && error != ENOENT) {
+			VOP_UNLOCK(fdvp);
+			goto abort;
+		}
+		if (error == ENOENT) {
+			/*
+			 * Note: currently if the name doesn't exist,
+			 * relookup succeeds (it intercepts the
+			 * EJUSTRETURN from VOP_LOOKUP) and sets tvp
+			 * to NULL. Therefore, we will never get
+			 * ENOENT and this branch is not needed.
+			 * However, in a saner future the EJUSTRETURN
+			 * garbage will go away, so let's DTRT.
+			 */
+			tvp = NULL;
+		}
+
+		/* tvp is locked; lock fvp if necessary */
+		if (!tvp || tvp != fvp) {
+			vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
+		}
+	} else {
+		int found_fdvp;
+		struct vnode *illegal_fvp;
+
+		/*
+		 * The source must not be above the destination. (If
+		 * it were, the rename would detach a section of the
+		 * tree.)
+		 *
+		 * Look up the tree from tdvp to see if we find fdvp,
+		 * and if so, return the immediate child of fdvp we're
+		 * under; that must not turn out to be the same as
+		 * fvp.
 	 *
-	 * Inline the relevant section of ufs_rename here, *before*
-	 * calling SET_DIROP_REMOVE.
+		 * The per-volume rename lock guarantees that the
+		 * result of this check remains true until we finish
+		 * looking up and locking.
 	 */
+		error = ufs_parentcheck(fdvp, tdvp, fcnp->cn_cred,
+					&found_fdvp, &illegal_fvp);
+		if (error) {
+			goto abort;
+		}
+
+		/* Must lock in tree order. */
+
+		if (found_fdvp) {
+			/* fdvp -> fvp -> tdvp -> tvp */
+			error = lock_vnode_sequence(fdvp, &from_ulr,
+						    &fvp, fcnp, 0,
+						    EINVAL,
+						    tdvp, &to_ulr,
+						    &tvp, tcnp, 1);
+		} else {
+			/* tdvp -> tvp -> fdvp -> fvp */
+			error = lock_vnode_sequence(tdvp, &to_ulr,
+						    &tvp, tcnp, 1,
+						    ENOTEMPTY,
+						    fdvp, &from_ulr,
+						    &fvp, fcnp, 0);
+		}
+		if (error) {
+			if (illegal_fvp) {
+				vrele(illegal_fvp);
+			}
+			goto abort;
+		}
+		KASSERT(fvp != NULL);
+
+		if (illegal_fvp && fvp == illegal_fvp) {
+			vrele(illegal_fvp);
+			error = EINVAL;
+			goto abort_withlocks;
+		}
+
+		if (illegal_fvp) {
+			vrele(illegal_fvp);
+		}
+	}
+
+	KASSERT(fdvp && VOP_ISLOCKED(fdvp));
+	KASSERT(fvp && VOP_ISLOCKED(fvp));
+	KASSERT(tdvp && VOP_ISLOCKED(tdvp));
+	KASSERT(tvp == NULL || VOP_ISLOCKED(tvp));
+
+	/* --- everything is now locked --- */
+
 	if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
 		    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
-		goto errout;
+		goto abort_withlocks;
 	}
+
+	/*
+	 * Check if just deleting a link name.
+	 */
 	if (fvp == tvp) {
 		if (fvp->v_type == VDIR) {
 			error = EINVAL;
-			goto errout;
+			goto abort_withlocks;
 		}
 
-		/* Release destination completely. */
+		/* Release destination completely. Leave fdvp locked. */
 		VOP_ABORTOP(tdvp, tcnp);
-		vput(tdvp);
-		vput(tvp);
+		if (fdvp != tdvp) {
+			VOP_UNLOCK(tdvp);
+		}
+		VOP_UNLOCK(tvp);
+		vrele(tdvp);
+		vrele(tvp);
 
 		/* Delete source. */
+		/* XXX: do we really need to relookup again? */
+
+		/*
+		 * fdvp is still locked, but we just unlocked fvp
+		 * (because fvp == tvp) so just decref fvp
+		 */
 		vrele(fvp);
 		fcnp->cn_flags &= ~(MODMASK);
 		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 		fcnp->cn_nameiop = DELETE;
-		vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
 		if ((error = relookup(fdvp, &fvp, fcnp, 0))) {
 			vput(fdvp);
 			return (error);
@@ -884,28 +1243,436 @@ lfs_rename(void *v)
 		return (VOP_REMOVE(fdvp, fvp, fcnp));
 	}
 
+	/* The tiny bit of actual LFS code in this function */
 	if ((error = SET_DIROP_REMOVE(tdvp, tvp)) != 0)
-		goto errout;
+		goto abort_withlocks;
 	MARK_VNODE(fdvp);
 	MARK_VNODE(fvp);
+	marked = 1;
+
+	fdp = VTOI(fdvp);
+	ip = VTOI(fvp);
+	if ((nlink_t) ip->i_nlink >= LINK_MAX) {
+		error = EMLINK;
+		goto abort_withlocks;
+	}
+	if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
+		(fdp->i_flags & APPEND)) {
+		error = EPERM;
+		goto abort_withlocks;
+	}
+	if ((ip->i_mode & IFMT) == IFDIR) {
+		/*
+		 * Avoid ".", "..", and aliases of "." for obvious reasons.
+		 */
+		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+		    fdp == ip ||
+		    (fcnp->cn_flags & ISDOTDOT) ||
+		    (tcnp->cn_flags & ISDOTDOT) ||
+		    (ip->i_flag & IN_RENAME)) {
+			error = EINVAL;
+			goto abort_withlocks;
+		}
+		ip->i_flag |= IN_RENAME;
+		doingdirectory = 1;
+	}
+	oldparent = fdp->i_number;
+	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXXLUKEM/XXX: right place? */
+
+	/*
+	 * Both the directory
+	 * and target vnodes are locked.
+	 */
+	tdp = VTOI(tdvp);
+	txp = NULL;
+	if (tvp)
+		txp = VTOI(tvp);
+
+	mp = fdvp->v_mount;
+	fstrans_start(mp, FSTRANS_SHARED);
+
+	if (oldparent != tdp->i_number)
+		newparent = tdp->i_number;
+
+	/*
+	 * If ".." must be changed (ie the directory gets a new
+	 * parent) the user must have write permission in the source
+	 * so as to be able to change "..".
+	 */
+	if (doingdirectory && newparent) {
+		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
+		if (error)
+			goto out;
+	}
+
+	KASSERT(fdvp != tvp);
+
+	if (newparent) {
+		/* Check for the rename("foo/foo", "foo") case. */
+		if (fdvp == tvp) {
+			error = doingdirectory ? ENOTEMPTY : EISDIR;
+			goto out;
+		}
+	}
+
+	fxp = VTOI(fvp);
+	fdp = VTOI(fdvp);
 
-	error = ufs_rename(ap);
+	error = UFS_WAPBL_BEGIN(fdvp->v_mount);
+	if (error)
+		goto out2;
+
+	/*
+	 * 1) Bump link count while we're moving stuff
+	 *    around.  If we crash somewhere before
+	 *    completing our work, the link count
+	 *    may be wrong, but correctable.
+	 */
+	ip->i_nlink++;
+	DIP_ASSIGN(ip, nlink, ip->i_nlink);
+	ip->i_flag |= IN_CHANGE;
+	if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
+		goto bad;
+	}
+
+	/*
+	 * 2) If target doesn't exist, link the target
+	 *    to the source and unlink the source.
+	 *    Otherwise, rewrite the target directory
+	 *    entry to reference the source inode and
+	 *    expunge the original entry's existence.
+	 */
+	if (txp == NULL) {
+		if (tdp->i_dev != ip->i_dev)
+			panic("rename: EXDEV");
+		/*
+		 * Account for ".." in new directory.
+		 * When source and destination have the same
+		 * parent we don't fool with the link count.
+		 */
+		if (doingdirectory && newparent) {
+			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
+				error = EMLINK;
+				goto bad;
+			}
+			tdp->i_nlink++;
+			DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+			tdp->i_flag |= IN_CHANGE;
+			if ((error = UFS_UPDATE(tdvp, NULL, NULL,
+			    UPDATE_DIROP)) != 0) {
+				tdp->i_nlink--;
+				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+				tdp->i_flag |= IN_CHANGE;
+				goto bad;
+			}
+		}
+		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
+		ufs_makedirentry(ip, tcnp, newdir);
+		error = ufs_direnter(tdvp, &to_ulr,
+				     NULL, newdir, tcnp, NULL);
+		pool_cache_put(ufs_direct_cache, newdir);
+		if (error != 0) {
+			if (doingdirectory && newparent) {
+				tdp->i_nlink--;
+				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+				tdp->i_flag |= IN_CHANGE;
+				(void)UFS_UPDATE(tdvp, NULL, NULL,
+						 UPDATE_WAIT | UPDATE_DIROP);
+			}
+			goto bad;
+		}
+		VN_KNOTE(tdvp, NOTE_WRITE);
+	} else {
+		if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
+			panic("rename: EXDEV");
+		/*
+		 * Short circuit rename(foo, foo).
+		 */
+		if (txp->i_number == ip->i_number)
+			panic("rename: same file");
+		/*
+		 * If the parent directory is "sticky", then the user must
+		 * own the parent directory, or the destination of the rename,
+		 * otherwise the destination may not be changed (except by
+		 * root). This implements append-only directories.
+		 */
+		if ((tdp->i_mode & S_ISTXT) &&
+		    kauth_authorize_generic(tcnp->cn_cred,
+		     KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
+		    kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
+		    txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
+			error = EPERM;
+			goto bad;
+		}
+		/*
+		 * Target must be empty if a directory and have no links
+		 * to it. Also, ensure source and target are compatible
+		 * (both directories, or both not directories).
+		 */
+		if ((txp->i_mode & IFMT) == IFDIR) {
+			if (txp->i_nlink > 2 ||
+			    !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
+				error = ENOTEMPTY;
+				goto bad;
+			}
+			if (!doingdirectory) {
+				error = ENOTDIR;
+				goto bad;
+			}
+			cache_purge(tdvp);
+		} else if (doingdirectory) {
+			error = EISDIR;
+			goto bad;
+		}
+		if ((error = ufs_dirrewrite(tdp, to_ulr.ulr_offset,
+		    txp, ip->i_number,
+		    IFTODT(ip->i_mode), doingdirectory && newparent ?
+		    newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
+			goto bad;
+		if (doingdirectory) {
+			/*
+			 * Truncate inode. The only stuff left in the directory
+			 * is "." and "..". The "." reference is inconsequential
+			 * since we are quashing it. We have removed the "."
+			 * reference and the reference in the parent directory,
+			 * but there may be other hard links.
+			 */
+			if (!newparent) {
+				tdp->i_nlink--;
+				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+				tdp->i_flag |= IN_CHANGE;
+				UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
+			}
+			txp->i_nlink--;
+			DIP_ASSIGN(txp, nlink, txp->i_nlink);
+			txp->i_flag |= IN_CHANGE;
+			if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+			    tcnp->cn_cred)))
+				goto bad;
+		}
+		VN_KNOTE(tdvp, NOTE_WRITE);
+		VN_KNOTE(tvp, NOTE_DELETE);
+	}
+
+	/*
+	 * Handle case where the directory entry we need to remove,
+	 * which is/was at from_ulr.ulr_offset, or the one before it,
+	 * which is/was at from_ulr.ulr_offset - from_ulr.ulr_count,
+	 * may have been moved when the directory insertion above
+	 * performed compaction.
+	 */
+	if (tdp->i_number == fdp->i_number &&
+	    ulr_overlap(&from_ulr, &to_ulr)) {
+
+		struct buf *bp;
+		struct direct *ep;
+		struct ufsmount *ump = fdp->i_ump;
+		doff_t curpos;
+		doff_t endsearch;	/* offset to end directory search */
+		uint32_t prev_reclen;
+		int dirblksiz = ump->um_dirblksiz;
+		const int needswap = UFS_MPNEEDSWAP(ump);
+		u_long bmask;
+		int namlen, entryoffsetinblock;
+		char *dirbuf;
+
+		bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
+
+		/*
+		 * The fcnp entry will be somewhere between the start of
+		 * compaction (to_ulr.ulr_offset) and the original location
+		 * (from_ulr.ulr_offset).
+		 */
+		curpos = to_ulr.ulr_offset;
+		endsearch = from_ulr.ulr_offset + from_ulr.ulr_reclen;
+		entryoffsetinblock = 0;
+
+		/*
+		 * Get the directory block containing the start of
+		 * compaction.
+		 */
+		error = ufs_blkatoff(fdvp, (off_t)to_ulr.ulr_offset, &dirbuf,
+		    &bp, false);
+		if (error)
+			goto bad;
+
+		/*
+		 * Keep existing ulr_count (length of previous record)
+		 * for the case where compaction did not include the
+		 * previous entry but started at the from-entry.
+		 */
+		prev_reclen = from_ulr.ulr_count;
+
+		while (curpos < endsearch) {
+			uint32_t reclen;
+
+			/*
+			 * If necessary, get the next directory block.
+			 *
+			 * dholland 7/13/11 to the best of my understanding
+			 * this should never happen; compaction occurs only
+			 * within single blocks. I think.
+			 */
+			if ((curpos & bmask) == 0) {
+				if (bp != NULL)
+					brelse(bp, 0);
+				error = ufs_blkatoff(fdvp, (off_t)curpos,
+				    &dirbuf, &bp, false);
+				if (error)
+					goto bad;
+				entryoffsetinblock = 0;
+			}
+
+			KASSERT(bp != NULL);
+			ep = (struct direct *)(dirbuf + entryoffsetinblock);
+			reclen = ufs_rw16(ep->d_reclen, needswap);
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+			if (FSFMT(fdvp) && needswap == 0)
+				namlen = ep->d_type;
+			else
+				namlen = ep->d_namlen;
+#else
+			if (FSFMT(fdvp) && needswap != 0)
+				namlen = ep->d_type;
+			else
+				namlen = ep->d_namlen;
+#endif
+			if ((ep->d_ino != 0) &&
+			    (ufs_rw32(ep->d_ino, needswap) != WINO) &&
+			    (namlen == fcnp->cn_namelen) &&
+			    memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
+				from_ulr.ulr_reclen = reclen;
+				break;
+			}
+			curpos += reclen;
+			entryoffsetinblock += reclen;
+			prev_reclen = reclen;
+		}
+
+		from_ulr.ulr_offset = curpos;
+		from_ulr.ulr_count = prev_reclen;
+
+		KASSERT(curpos <= endsearch);
+
+		/*
+		 * If ulr_offset points to start of a directory block,
+		 * clear ulr_count so ufs_dirremove() doesn't try to
+		 * merge free space over a directory block boundary.
+		 */
+		if ((from_ulr.ulr_offset & (dirblksiz - 1)) == 0)
+			from_ulr.ulr_count = 0;
+
+		brelse(bp, 0);
+	}
+
+	/*
+	 * 3) Unlink the source.
+	 */
+
+#if 0
+	/*
+	 * Ensure that the directory entry still exists and has not
+	 * changed while the new name has been entered. If the source is
+	 * a file then the entry may have been unlinked or renamed. In
+	 * either case there is no further work to be done. If the source
+	 * is a directory then it cannot have been rmdir'ed; The IRENAME
+	 * flag ensures that it cannot be moved by another rename or removed
+	 * by a rmdir.
+	 */
+#endif
+	KASSERT(fxp == ip);
+
+	/*
+	 * If the source is a directory with a new parent, the link
+	 * count of the old parent directory must be decremented and
+	 * ".." set to point to the new parent.
+	 */
+	if (doingdirectory && newparent) {
+		KASSERT(fdp != NULL);
+		ufs_dirrewrite(fxp, mastertemplate.dot_reclen,
+			       fdp, newparent, DT_DIR, 0, IN_CHANGE);
+		cache_purge(fdvp);
+	}
+	error = ufs_dirremove(fdvp, &from_ulr,
+			      fxp, fcnp->cn_flags, 0);
+	fxp->i_flag &= ~IN_RENAME;
+
+	VN_KNOTE(fvp, NOTE_RENAME);
+	goto done;
+
+ out:
+	goto out2;
+
+	/* exit routines from steps 1 & 2 */
+ bad:
+	if (doingdirectory)
+		ip->i_flag &= ~IN_RENAME;
+	ip->i_nlink--;
+	DIP_ASSIGN(ip, nlink, ip->i_nlink);
+	ip->i_flag |= IN_CHANGE;
+	ip->i_flag &= ~IN_RENAME;
+	UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
+ done:
+	UFS_WAPBL_END(fdvp->v_mount);
+ out2:
+	/*
+	 * clear IN_RENAME - some exit paths happen too early to go
+	 * through the cleanup done in the "bad" case above, so we
+	 * always do this mini-cleanup here.
+	 */
+	ip->i_flag &= ~IN_RENAME;
+
+	VOP_UNLOCK(fdvp);
+	if (tdvp != fdvp) {
+		VOP_UNLOCK(tdvp);
+	}
+	VOP_UNLOCK(fvp);
+	if (tvp && tvp != fvp) {
+		VOP_UNLOCK(tvp);
+	}
+
+	vrele(fdvp);
+	vrele(tdvp);
+	vrele(fvp);
+	if (tvp) {
+		vrele(tvp);
+	}
+
+	fstrans_done(mp);
+	if (marked) {
 	UNMARK_VNODE(fdvp);
 	UNMARK_VNODE(fvp);
 	SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename");
+	}
 	return (error);
 
-  errout:
-	VOP_ABORTOP(tdvp, ap->a_tcnp); /* XXX, why not in NFS? */
-	if (tdvp == tvp)
+ abort_withlocks:
+	VOP_UNLOCK(fdvp);
+	if (tdvp != fdvp) {
+		VOP_UNLOCK(tdvp);
+	}
+	VOP_UNLOCK(fvp);
+	if (tvp && tvp != fvp) {
+		VOP_UNLOCK(tvp);
+	}
+
+ abort:
+	VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
+	VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
 		vrele(tdvp);
-	else
-		vput(tdvp);
-	if (tvp)
-		vput(tvp);
-	VOP_ABORTOP(fdvp, ap->a_fcnp); /* XXX, why not in NFS? */
+	if (tvp) {
+		vrele(tvp);
+	}
 	vrele(fdvp);
+	if (fvp) {
 	vrele(fvp);
+	}
+	if (marked) {
+		UNMARK_VNODE(fdvp);
+		UNMARK_VNODE(fvp);
+		SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename");
+	}
 	return (error);
 }
 

Index: src/tests/fs/vfs/t_renamerace.c
diff -u src/tests/fs/vfs/t_renamerace.c:1.24 src/tests/fs/vfs/t_renamerace.c:1.25
--- src/tests/fs/vfs/t_renamerace.c:1.24	Sat Oct  8 13:08:54 2011
+++ src/tests/fs/vfs/t_renamerace.c	Thu Feb 16 02:47:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_renamerace.c,v 1.24 2011/10/08 13:08:54 njoly Exp $	*/
+/*	$NetBSD: t_renamerace.c,v 1.25 2012/02/16 02:47:56 perseant Exp $	*/
 
 /*
  * Modified for rump and atf from a program supplied
@@ -81,9 +81,6 @@ renamerace(const atf_tc_t *tc, const cha
 	pthread_t pt1[NWRK], pt2[NWRK];
 	int i;
 
-	if (FSTYPE_LFS(tc))
-		atf_tc_expect_signal(-1, "PR kern/43582");
-
 	if (FSTYPE_RUMPFS(tc))
 		atf_tc_skip("rename not supported by file system");
 
@@ -106,13 +103,6 @@ renamerace(const atf_tc_t *tc, const cha
 		pthread_join(pt2[i], NULL);
 	RL(rump_sys_chdir("/"));
 
-	/*
-	 * XXX: does not always fail on LFS, especially for unicpu
-	 * configurations.  see other ramblings about racy tests.
-	 */
-	if (FSTYPE_LFS(tc))
-		abort();
-
 	if (FSTYPE_MSDOS(tc)) {
 		atf_tc_expect_fail("PR kern/44661");
 		/*
@@ -139,7 +129,7 @@ renamerace_dirs(const atf_tc_t *tc, cons
 		atf_tc_skip("rename not supported by file system");
 
 	/* XXX: msdosfs also sometimes hangs */
-	if (FSTYPE_EXT2FS(tc) || FSTYPE_LFS(tc) || FSTYPE_MSDOS(tc))
+	if (FSTYPE_EXT2FS(tc) || FSTYPE_MSDOS(tc))
 		atf_tc_expect_signal(-1, "PR kern/43626");
 
 	/* XXX: unracy execution not caught */
@@ -164,7 +154,7 @@ renamerace_dirs(const atf_tc_t *tc, cons
 	 * Doesn't always trigger when run on a slow backend
 	 * (i.e. not on tmpfs/mfs).  So do the usual kludge.
 	 */
-	if (FSTYPE_EXT2FS(tc) || FSTYPE_LFS(tc) || FSTYPE_MSDOS(tc))
+	if (FSTYPE_EXT2FS(tc) || FSTYPE_MSDOS(tc))
 		abort();
 
 	if (FSTYPE_P2K_FFS(tc)) {

Index: src/tests/fs/vfs/t_rmdirrace.c
diff -u src/tests/fs/vfs/t_rmdirrace.c:1.8 src/tests/fs/vfs/t_rmdirrace.c:1.9
--- src/tests/fs/vfs/t_rmdirrace.c:1.8	Sat Oct  8 13:08:54 2011
+++ src/tests/fs/vfs/t_rmdirrace.c	Thu Feb 16 02:47:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_rmdirrace.c,v 1.8 2011/10/08 13:08:54 njoly Exp $	*/
+/*	$NetBSD: t_rmdirrace.c,v 1.9 2012/02/16 02:47:56 perseant Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -68,8 +68,6 @@ race(const atf_tc_t *tc, const char *pat
 	int res, fd, quit;
 	pthread_t th1, th2;
 
-	if (FSTYPE_LFS(tc))
-		atf_tc_expect_signal(-1, "PR kern/43582");
 	if (FSTYPE_SYSVBFS(tc))
 		atf_tc_skip("directories not supported by file system");
 
@@ -103,14 +101,6 @@ race(const atf_tc_t *tc, const char *pat
 	res = rump_sys_fchdir(fd);
 	if (res == -1)
 		atf_tc_fail("fchdir failed");
-
-	/*
-	 * Rarely the LFS test does not crash.  atf currently has no way of
-	 * saying "just chill even if the test doesn't fail", so this
-	 * takes care of it.
-	 */
-	if (FSTYPE_LFS(tc))
-		abort();
 }
 
 ATF_FSAPPLY(race, "rmdir(2) race");

Reply via email to