Module Name: src
Committed By: bouyer
Date: Sat Mar 17 17:40:08 UTC 2012
Modified Files:
src/sbin/newfs_lfs [netbsd-6]: make_lfs.c
src/sys/ufs/lfs [netbsd-6]: lfs.h lfs_alloc.c lfs_bio.c lfs_segment.c
lfs_vfsops.c lfs_vnops.c
src/tests/fs/vfs [netbsd-6]: t_renamerace.c t_rmdirrace.c
Log Message:
Pull up following revision(s) (requested by perseant in ticket #116):
sys/ufs/lfs/lfs_alloc.c: revision 1.112
tests/fs/vfs/t_rmdirrace.c: revision 1.9
tests/fs/vfs/t_renamerace.c: revision 1.25
sys/ufs/lfs/lfs_vnops.c: revision 1.240
sys/ufs/lfs/lfs_segment.c: revision 1.224
sys/ufs/lfs/lfs_bio.c: revision 1.122
sys/ufs/lfs/lfs_vfsops.c: revision 1.294
sbin/newfs_lfs/make_lfs.c: revision 1.19
sys/ufs/lfs/lfs.h: revision 1.136
Pass t_renamerace and t_rmdirrace tests.
Adapt dholland@'s fix to ufs_rename to fix PR kern/43582. Address several
other MP locking issues discovered during the course of investigating the
same problem.
Removed extraneous vn_lock() calls on the Ifile, since the Ifile writes
are controlled by the segment lock.
Fix PR kern/45982 by deemphasizing the estimate of how much metadata
will fill the empty space on disk when the disk is nearly empty
(t_renamerace crates a lot of inode blocks on a tiny empty disk).
To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.18.2.1 src/sbin/newfs_lfs/make_lfs.c
cvs rdiff -u -r1.135 -r1.135.2.1 src/sys/ufs/lfs/lfs.h
cvs rdiff -u -r1.111 -r1.111.8.1 src/sys/ufs/lfs/lfs_alloc.c
cvs rdiff -u -r1.121 -r1.121.2.1 src/sys/ufs/lfs/lfs_bio.c
cvs rdiff -u -r1.223 -r1.223.2.1 src/sys/ufs/lfs/lfs_segment.c
cvs rdiff -u -r1.293 -r1.293.2.1 src/sys/ufs/lfs/lfs_vfsops.c
cvs rdiff -u -r1.239 -r1.239.2.1 src/sys/ufs/lfs/lfs_vnops.c
cvs rdiff -u -r1.24 -r1.24.4.1 src/tests/fs/vfs/t_renamerace.c
cvs rdiff -u -r1.8 -r1.8.4.1 src/tests/fs/vfs/t_rmdirrace.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sbin/newfs_lfs/make_lfs.c
diff -u src/sbin/newfs_lfs/make_lfs.c:1.18 src/sbin/newfs_lfs/make_lfs.c:1.18.2.1
--- src/sbin/newfs_lfs/make_lfs.c:1.18 Thu Feb 2 03:50:32 2012
+++ src/sbin/newfs_lfs/make_lfs.c Sat Mar 17 17:40:08 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: make_lfs.c,v 1.18 2012/02/02 03:50:32 perseant Exp $ */
+/* $NetBSD: make_lfs.c,v 1.18.2.1 2012/03/17 17:40:08 bouyer Exp $ */
/*-
* Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
#if 0
static char sccsid[] = "@(#)lfs.c 8.5 (Berkeley) 5/24/95";
#else
-__RCSID("$NetBSD: make_lfs.c,v 1.18 2012/02/02 03:50:32 perseant Exp $");
+__RCSID("$NetBSD: make_lfs.c,v 1.18.2.1 2012/03/17 17:40:08 bouyer Exp $");
#endif
#endif /* not lint */
@@ -496,7 +496,7 @@ make_lfs(int devfd, uint secsize, struct
if (fs->lfs_resvseg < MIN_RESV_SEGS)
fs->lfs_resvseg = MIN_RESV_SEGS;
- if(fs->lfs_nseg < (3 * CM_MAG_NUM * fs->lfs_minfreeseg) / CM_MAG_DEN + 1
+ if(fs->lfs_nseg < (4 * fs->lfs_minfreeseg)
|| fs->lfs_nseg < LFS_MIN_SBINTERVAL + 1)
{
if(seg_size == 0 && ssize > (bsize<<1)) {
Index: src/sys/ufs/lfs/lfs.h
diff -u src/sys/ufs/lfs/lfs.h:1.135 src/sys/ufs/lfs/lfs.h:1.135.2.1
--- src/sys/ufs/lfs/lfs.h:1.135 Mon Jan 2 22:10:44 2012
+++ src/sys/ufs/lfs/lfs.h Sat Mar 17 17:40:07 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs.h,v 1.135 2012/01/02 22:10:44 perseant Exp $ */
+/* $NetBSD: lfs.h,v 1.135.2.1 2012/03/17 17:40:07 bouyer Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -1027,13 +1027,23 @@ struct lfs_inode_ext {
/*
* Estimate number of clean blocks not available for writing because
* they will contain metadata or overhead. This is calculated as
- * (dmeta / # dirty segments) * (# clean segments).
+ *
+ * E = ((C * M / D) * D + (0) * (T - D)) / T
+ * or more simply
+ * E = (C * M) / T
+ *
+ * where
+ * C is the clean space,
+ * D is the dirty space,
+ * M is the dirty metadata, and
+ * T = C + D is the total space on disk.
+ *
+ * This approximates the old formula of E = C * M / D when D is close to T,
+ * but avoids falsely reporting "disk full" when the sample size (D) is small.
*/
-#define CM_MAG_NUM 3
-#define CM_MAG_DEN 2
#define LFS_EST_CMETA(F) (int32_t)(( \
- (CM_MAG_NUM * ((F)->lfs_dmeta * (int64_t)(F)->lfs_nclean)) / \
- (CM_MAG_DEN * ((F)->lfs_nseg - (F)->lfs_nclean))))
+ ((F)->lfs_dmeta * (int64_t)(F)->lfs_nclean) / \
+ ((F)->lfs_nseg)))
/* Estimate total size of the disk not including metadata */
#define LFS_EST_NONMETA(F) ((F)->lfs_dsize - (F)->lfs_dmeta - LFS_EST_CMETA(F))
Index: src/sys/ufs/lfs/lfs_alloc.c
diff -u src/sys/ufs/lfs/lfs_alloc.c:1.111 src/sys/ufs/lfs/lfs_alloc.c:1.111.8.1
--- src/sys/ufs/lfs/lfs_alloc.c:1.111 Sun Jun 12 03:36:01 2011
+++ src/sys/ufs/lfs/lfs_alloc.c Sat Mar 17 17:40:06 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_alloc.c,v 1.111 2011/06/12 03:36:01 rmind Exp $ */
+/* $NetBSD: lfs_alloc.c,v 1.111.8.1 2012/03/17 17:40:06 bouyer Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.111 2011/06/12 03:36:01 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.111.8.1 2012/03/17 17:40:06 bouyer Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@@ -207,7 +207,6 @@ lfs_valloc(struct vnode *pvp, int mode,
ASSERT_NO_SEGLOCK(fs);
lfs_seglock(fs, SEGM_PROT);
- vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE);
/* Get the head of the freelist. */
LFS_GET_HEADFREE(fs, cip, cbp, &new_ino);
@@ -236,7 +235,6 @@ lfs_valloc(struct vnode *pvp, int mode,
if (fs->lfs_freehd == LFS_UNUSED_INUM) {
if ((error = lfs_extend_ifile(fs, cred)) != 0) {
LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
- VOP_UNLOCK(fs->lfs_ivnode);
lfs_segunlock(fs);
return error;
}
@@ -252,7 +250,6 @@ lfs_valloc(struct vnode *pvp, int mode,
mutex_exit(&lfs_lock);
++fs->lfs_nfiles;
- VOP_UNLOCK(fs->lfs_ivnode);
lfs_segunlock(fs);
return lfs_ialloc(fs, pvp, new_ino, new_gen, vpp);
@@ -440,7 +437,6 @@ lfs_vfree(struct vnode *vp, ino_t ino, i
mutex_exit(vp->v_interlock);
lfs_seglock(fs, SEGM_PROT);
- vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE);
lfs_unmark_vnode(vp);
mutex_enter(&lfs_lock);
@@ -575,7 +571,6 @@ lfs_vfree(struct vnode *vp, ino_t ino, i
mutex_exit(&lfs_lock);
--fs->lfs_nfiles;
- VOP_UNLOCK(fs->lfs_ivnode);
lfs_segunlock(fs);
return (0);
Index: src/sys/ufs/lfs/lfs_bio.c
diff -u src/sys/ufs/lfs/lfs_bio.c:1.121 src/sys/ufs/lfs/lfs_bio.c:1.121.2.1
--- src/sys/ufs/lfs/lfs_bio.c:1.121 Mon Jan 2 22:10:44 2012
+++ src/sys/ufs/lfs/lfs_bio.c Sat Mar 17 17:40:07 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_bio.c,v 1.121 2012/01/02 22:10:44 perseant Exp $ */
+/* $NetBSD: lfs_bio.c,v 1.121.2.1 2012/03/17 17:40:07 bouyer Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.121 2012/01/02 22:10:44 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.121.2.1 2012/03/17 17:40:07 bouyer Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -151,12 +151,15 @@ static int
lfs_reservebuf(struct lfs *fs, struct vnode *vp,
struct vnode *vp2, int n, int bytes)
{
+ int cantwait;
+
ASSERT_MAYBE_SEGLOCK(fs);
KASSERT(locked_queue_rcount >= 0);
KASSERT(locked_queue_rbytes >= 0);
+ cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
mutex_enter(&lfs_lock);
- while (n > 0 && !lfs_fits_buf(fs, n, bytes)) {
+ while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) {
int error;
lfs_flush(fs, 0, 0);
@@ -213,28 +216,15 @@ lfs_reserveavail(struct lfs *fs, struct
CLEANERINFO *cip;
struct buf *bp;
int error, slept;
+ int cantwait;
ASSERT_MAYBE_SEGLOCK(fs);
slept = 0;
mutex_enter(&lfs_lock);
- while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
+ cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
+ while (!cantwait && fsb > 0 &&
+ !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
mutex_exit(&lfs_lock);
-#if 0
- /*
- * XXX ideally, we should unlock vnodes here
- * because we might sleep very long time.
- */
- VOP_UNLOCK(vp);
- if (vp2 != NULL) {
- VOP_UNLOCK(vp2);
- }
-#else
- /*
- * XXX since we'll sleep for cleaner with vnode lock holding,
- * deadlock will occur if cleaner tries to lock the vnode.
- * (eg. lfs_markv -> lfs_fastvget -> getnewvnode -> vclean)
- */
-#endif
if (!slept) {
DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %d,"
@@ -256,10 +246,6 @@ lfs_reserveavail(struct lfs *fs, struct
error = mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve",
0, &lfs_lock);
-#if 0
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
- vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
-#endif
if (error) {
mutex_exit(&lfs_lock);
return error;
@@ -285,7 +271,6 @@ int
lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
{
int error;
- int cantwait;
ASSERT_MAYBE_SEGLOCK(fs);
if (vp2) {
@@ -300,30 +285,18 @@ lfs_reserve(struct lfs *fs, struct vnode
KASSERT(fsb < 0 || VOP_ISLOCKED(vp));
KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2));
- KASSERT(vp2 == NULL || !(VTOI(vp2)->i_flag & IN_ADIROP));
KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp);
- cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
#ifdef DIAGNOSTIC
- if (cantwait) {
- if (fsb > 0)
- lfs_rescountdirop++;
- else if (fsb < 0)
- lfs_rescountdirop--;
- if (lfs_rescountdirop < 0)
- panic("lfs_rescountdirop");
- }
- else {
- if (fsb > 0)
- lfs_rescount++;
- else if (fsb < 0)
- lfs_rescount--;
- if (lfs_rescount < 0)
- panic("lfs_rescount");
- }
+ mutex_enter(&lfs_lock);
+ if (fsb > 0)
+ lfs_rescount++;
+ else if (fsb < 0)
+ lfs_rescount--;
+ if (lfs_rescount < 0)
+ panic("lfs_rescount");
+ mutex_exit(&lfs_lock);
#endif
- if (cantwait)
- return 0;
/*
* XXX
Index: src/sys/ufs/lfs/lfs_segment.c
diff -u src/sys/ufs/lfs/lfs_segment.c:1.223 src/sys/ufs/lfs/lfs_segment.c:1.223.2.1
--- src/sys/ufs/lfs/lfs_segment.c:1.223 Mon Jan 2 22:10:44 2012
+++ src/sys/ufs/lfs/lfs_segment.c Sat Mar 17 17:40:06 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_segment.c,v 1.223 2012/01/02 22:10:44 perseant Exp $ */
+/* $NetBSD: lfs_segment.c,v 1.223.2.1 2012/03/17 17:40:06 bouyer Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.223 2012/01/02 22:10:44 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.223.2.1 2012/03/17 17:40:06 bouyer Exp $");
#ifdef DEBUG
# define vndebug(vp, str) do { \
@@ -734,7 +734,6 @@ lfs_segwrite(struct mount *mp, int flags
did_ckp = 0;
if (do_ckp || fs->lfs_doifile) {
vp = fs->lfs_ivnode;
- vn_lock(vp, LK_EXCLUSIVE);
loopcount = 0;
do {
#ifdef DEBUG
@@ -807,7 +806,6 @@ lfs_segwrite(struct mount *mp, int flags
}
#endif
mutex_exit(vp->v_interlock);
- VOP_UNLOCK(vp);
} else {
(void) lfs_writeseg(fs, sp);
}
@@ -2603,8 +2601,8 @@ lfs_cluster_aiodone(struct buf *bp)
* XXX KS - Shouldn't we set *both* if both types
* of blocks are present (traverse the dirty list?)
*/
- mutex_enter(&lfs_lock);
mutex_enter(vp->v_interlock);
+ mutex_enter(&lfs_lock);
if (vp != devvp && vp->v_numoutput == 0 &&
(fbp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) {
ip = VTOI(vp);
@@ -2616,8 +2614,8 @@ lfs_cluster_aiodone(struct buf *bp)
LFS_SET_UINO(ip, IN_MODIFIED);
}
cv_broadcast(&vp->v_cv);
- mutex_exit(vp->v_interlock);
mutex_exit(&lfs_lock);
+ mutex_exit(vp->v_interlock);
}
/* Fix up the cluster buffer, and release it */
Index: src/sys/ufs/lfs/lfs_vfsops.c
diff -u src/sys/ufs/lfs/lfs_vfsops.c:1.293 src/sys/ufs/lfs/lfs_vfsops.c:1.293.2.1
--- src/sys/ufs/lfs/lfs_vfsops.c:1.293 Wed Jan 4 02:48:58 2012
+++ src/sys/ufs/lfs/lfs_vfsops.c Sat Mar 17 17:40:07 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_vfsops.c,v 1.293 2012/01/04 02:48:58 perseant Exp $ */
+/* $NetBSD: lfs_vfsops.c,v 1.293.2.1 2012/03/17 17:40:07 bouyer Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007
@@ -61,7 +61,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.293 2012/01/04 02:48:58 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.293.2.1 2012/03/17 17:40:07 bouyer Exp $");
#if defined(_KERNEL_OPT)
#include "opt_lfs.h"
@@ -2089,7 +2089,6 @@ lfs_resize_fs(struct lfs *fs, int newnse
* (XXX this could be done better.)
*/
rw_enter(&fs->lfs_iflock, RW_WRITER);
- vn_lock(ivp, LK_EXCLUSIVE | LK_RETRY);
for (i = 0; i < ilast; i++) {
bread(ivp, i, fs->lfs_bsize, NOCRED, 0, &bp);
brelse(bp, 0);
@@ -2205,7 +2204,6 @@ lfs_resize_fs(struct lfs *fs, int newnse
VOP_BWRITE(bp->b_vp, bp);
/* Let Ifile accesses proceed */
- VOP_UNLOCK(ivp);
rw_exit(&fs->lfs_iflock);
out:
Index: src/sys/ufs/lfs/lfs_vnops.c
diff -u src/sys/ufs/lfs/lfs_vnops.c:1.239 src/sys/ufs/lfs/lfs_vnops.c:1.239.2.1
--- src/sys/ufs/lfs/lfs_vnops.c:1.239 Mon Jan 2 22:10:45 2012
+++ src/sys/ufs/lfs/lfs_vnops.c Sat Mar 17 17:40:06 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_vnops.c,v 1.239 2012/01/02 22:10:45 perseant Exp $ */
+/* $NetBSD: lfs_vnops.c,v 1.239.2.1 2012/03/17 17:40:06 bouyer Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.239 2012/01/02 22:10:45 perseant Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.239.2.1 2012/03/17 17:40:06 bouyer Exp $");
#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
@@ -91,6 +91,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <uvm/uvm.h>
@@ -437,7 +438,6 @@ lfs_set_dirop(struct vnode *dvp, struct
}
if (lfs_dirvcount > LFS_MAX_DIROP) {
- mutex_exit(&lfs_lock);
DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
"dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
if ((error = mtsleep(&lfs_dirvcount,
@@ -554,9 +554,11 @@ lfs_mark_vnode(struct vnode *vp)
mutex_enter(&lfs_lock);
if (!(ip->i_flag & IN_ADIROP)) {
if (!(vp->v_uflag & VU_DIROP)) {
+ mutex_exit(&lfs_lock);
mutex_enter(vp->v_interlock);
if (lfs_vref(vp) != 0)
panic("lfs_mark_vnode: could not vref");
+ mutex_enter(&lfs_lock);
++lfs_dirvcount;
++fs->lfs_dirvcount;
TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
@@ -575,13 +577,13 @@ lfs_unmark_vnode(struct vnode *vp)
{
struct inode *ip = VTOI(vp);
+ mutex_enter(&lfs_lock);
if (ip && (ip->i_flag & IN_ADIROP)) {
KASSERT(vp->v_uflag & VU_DIROP);
- mutex_enter(&lfs_lock);
--ip->i_lfs->lfs_nadirop;
- mutex_exit(&lfs_lock);
ip->i_flag &= ~IN_ADIROP;
}
+ mutex_exit(&lfs_lock);
}
int
@@ -808,6 +810,188 @@ lfs_link(void *v)
return (error);
}
+/* XXX following lifted from ufs_lookup.c */
+#define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0)
+
+/*
+ * Check if either entry referred to by FROM_ULR is within the range
+ * of entries named by TO_ULR.
+ */
+static int
+ulr_overlap(const struct ufs_lookup_results *from_ulr,
+ const struct ufs_lookup_results *to_ulr)
+{
+ doff_t from_start, from_prevstart;
+ doff_t to_start, to_end;
+
+ /*
+ * FROM is a DELETE result; offset points to the entry to
+ * remove and subtracting count gives the previous entry.
+ */
+ from_start = from_ulr->ulr_offset - from_ulr->ulr_count;
+ from_prevstart = from_ulr->ulr_offset;
+
+ /*
+ * TO is a RENAME (thus non-DELETE) result; offset points
+ * to the beginning of a region to write in, and adding
+ * count gives the end of the region.
+ */
+ to_start = to_ulr->ulr_offset;
+ to_end = to_ulr->ulr_offset + to_ulr->ulr_count;
+
+ if (from_prevstart >= to_start && from_prevstart < to_end) {
+ return 1;
+ }
+ if (from_start >= to_start && from_start < to_end) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static const struct dirtemplate mastertemplate = {
+ 0, 12, DT_DIR, 1, ".",
+ 0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+
+/*
+ * Wrapper for relookup that also updates the supplemental results.
+ */
+static int
+do_relookup(struct vnode *dvp, struct ufs_lookup_results *ulr,
+ struct vnode **vp, struct componentname *cnp)
+{
+ int error;
+
+ error = relookup(dvp, vp, cnp, 0);
+ if (error) {
+ return error;
+ }
+ /* update the supplemental reasults */
+ *ulr = VTOI(dvp)->i_crap;
+ UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
+ return 0;
+}
+
+/*
+ * Lock and relookup a sequence of two directories and two children.
+ *
+ */
+static int
+lock_vnode_sequence(struct vnode *d1, struct ufs_lookup_results *ulr1,
+ struct vnode **v1_ret, struct componentname *cn1,
+ int v1_missing_ok,
+ int overlap_error,
+ struct vnode *d2, struct ufs_lookup_results *ulr2,
+ struct vnode **v2_ret, struct componentname *cn2,
+ int v2_missing_ok)
+{
+ struct vnode *v1, *v2;
+ int error;
+
+ KASSERT(d1 != d2);
+
+ vn_lock(d1, LK_EXCLUSIVE | LK_RETRY);
+ if (VTOI(d1)->i_size == 0) {
+ /* d1 has been rmdir'd */
+ VOP_UNLOCK(d1);
+ return ENOENT;
+ }
+ error = do_relookup(d1, ulr1, &v1, cn1);
+ if (v1_missing_ok) {
+ if (error == ENOENT) {
+ /*
+ * Note: currently if the name doesn't exist,
+ * relookup succeeds (it intercepts the
+ * EJUSTRETURN from VOP_LOOKUP) and sets tvp
+ * to NULL. Therefore, we will never get
+ * ENOENT and this branch is not needed.
+ * However, in a saner future the EJUSTRETURN
+ * garbage will go away, so let's DTRT.
+ */
+ v1 = NULL;
+ error = 0;
+ }
+ } else {
+ if (error == 0 && v1 == NULL) {
+ /* This is what relookup sets if v1 disappeared. */
+ error = ENOENT;
+ }
+ }
+ if (error) {
+ VOP_UNLOCK(d1);
+ return error;
+ }
+ if (v1 && v1 == d2) {
+ VOP_UNLOCK(d1);
+ VOP_UNLOCK(v1);
+ vrele(v1);
+ return overlap_error;
+ }
+
+ /*
+ * The right way to do this is to do lookups without locking
+ * the results, and lock the results afterwards; then at the
+ * end we can avoid trying to lock v2 if v2 == v1.
+ *
+ * However, for the reasons described in the fdvp == tdvp case
+ * in rename below, we can't do that safely. So, in the case
+ * where v1 is not a directory, unlock it and lock it again
+ * afterwards. This is safe in locking order because a
+ * non-directory can't be above anything else in the tree. If
+ * v1 *is* a directory, that's not true, but then because d1
+ * != d2, v1 != v2.
+ */
+ if (v1 && v1->v_type != VDIR) {
+ VOP_UNLOCK(v1);
+ }
+ vn_lock(d2, LK_EXCLUSIVE | LK_RETRY);
+ if (VTOI(d2)->i_size == 0) {
+ /* d2 has been rmdir'd */
+ VOP_UNLOCK(d2);
+ if (v1 && v1->v_type == VDIR) {
+ VOP_UNLOCK(v1);
+ }
+ VOP_UNLOCK(d1);
+ if (v1) {
+ vrele(v1);
+ }
+ return ENOENT;
+ }
+ error = do_relookup(d2, ulr2, &v2, cn2);
+ if (v2_missing_ok) {
+ if (error == ENOENT) {
+ /* as above */
+ v2 = NULL;
+ error = 0;
+ }
+ } else {
+ if (error == 0 && v2 == NULL) {
+ /* This is what relookup sets if v2 disappeared. */
+ error = ENOENT;
+ }
+ }
+ if (error) {
+ VOP_UNLOCK(d2);
+ if (v1 && v1->v_type == VDIR) {
+ VOP_UNLOCK(v1);
+ }
+ VOP_UNLOCK(d1);
+ if (v1) {
+ vrele(v1);
+ }
+ return error;
+ }
+ if (v1 && v1->v_type != VDIR && v1 != v2) {
+ vn_lock(v1, LK_EXCLUSIVE | LK_RETRY);
+ }
+ *v1_ret = v1;
+ *v2_ret = v2;
+ return 0;
+}
+
int
lfs_rename(void *v)
{
@@ -819,64 +1003,239 @@ lfs_rename(void *v)
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */ *ap = v;
- struct vnode *tvp, *fvp, *tdvp, *fdvp;
+ struct vnode *tvp, *tdvp, *fvp, *fdvp;
struct componentname *tcnp, *fcnp;
- int error;
- struct lfs *fs;
+ struct inode *ip, *txp, *fxp, *tdp, *fdp;
+ struct mount *mp;
+ struct direct *newdir;
+ int doingdirectory, error, marked;
+ ino_t oldparent, newparent;
+
+ struct ufs_lookup_results from_ulr, to_ulr;
+ struct lfs *fs = VTOI(ap->a_fvp)->i_lfs;
- fs = VTOI(ap->a_fdvp)->i_lfs;
tvp = ap->a_tvp;
tdvp = ap->a_tdvp;
- tcnp = ap->a_tcnp;
fvp = ap->a_fvp;
fdvp = ap->a_fdvp;
+ tcnp = ap->a_tcnp;
fcnp = ap->a_fcnp;
+ doingdirectory = error = 0;
+ oldparent = newparent = 0;
+ marked = 0;
+
+ /* save the supplemental lookup results as they currently exist */
+ from_ulr = VTOI(fdvp)->i_crap;
+ to_ulr = VTOI(tdvp)->i_crap;
+ UFS_CHECK_CRAPCOUNTER(VTOI(fdvp));
+ UFS_CHECK_CRAPCOUNTER(VTOI(tdvp));
+
+ /*
+ * Owing to VFS oddities we are currently called with tdvp/tvp
+ * locked and not fdvp/fvp. In a sane world we'd be passed
+ * tdvp and fdvp only, unlocked, and two name strings. Pretend
+ * we have a sane world and unlock tdvp and tvp.
+ */
+ VOP_UNLOCK(tdvp);
+ if (tvp && tvp != tdvp) {
+ VOP_UNLOCK(tvp);
+ }
+
+ /* Also pretend we have a sane world and vrele fvp/tvp. */
+ vrele(fvp);
+ fvp = NULL;
+ if (tvp) {
+ vrele(tvp);
+ tvp = NULL;
+ }
/*
* Check for cross-device rename.
- * If it is, we don't want to set dirops, just error out.
- * (In particular note that MARK_VNODE(tdvp) will DTWT on
- * a cross-device rename.)
- *
- * Copied from ufs_rename.
*/
- if ((fvp->v_mount != tdvp->v_mount) ||
- (tvp && (fvp->v_mount != tvp->v_mount))) {
+ if (fdvp->v_mount != tdvp->v_mount) {
error = EXDEV;
- goto errout;
+ goto abort;
}
/*
- * Check to make sure we're not renaming a vnode onto itself
- * (deleting a hard link by renaming one name onto another);
- * if we are we can't recursively call VOP_REMOVE since that
- * would leave us with an unaccounted-for number of live dirops.
+ * Reject "." and ".."
+ */
+ if ((fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) ||
+ (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+ (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')) {
+ error = EINVAL;
+ goto abort;
+ }
+
+ /*
+ * Get locks.
+ */
+
+ /* paranoia */
+ fcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
+ tcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
+
+ if (fdvp == tdvp) {
+ /* One directory. Lock it and relookup both children. */
+ vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
+
+ if (VTOI(fdvp)->i_size == 0) {
+ /* directory has been rmdir'd */
+ VOP_UNLOCK(fdvp);
+ error = ENOENT;
+ goto abort;
+ }
+
+ error = do_relookup(fdvp, &from_ulr, &fvp, fcnp);
+ if (error == 0 && fvp == NULL) {
+ /* relookup may produce this if fvp disappears */
+ error = ENOENT;
+ }
+ if (error) {
+ VOP_UNLOCK(fdvp);
+ goto abort;
+ }
+
+ /*
+ * The right way to do this is to look up both children
+ * without locking either, and then lock both unless they
+ * turn out to be the same. However, due to deep-seated
+ * VFS-level issues all lookups lock the child regardless
+ * of whether LOCKLEAF is set (if LOCKLEAF is not set,
+ * the child is locked during lookup and then unlocked)
+ * so it is not safe to look up tvp while fvp is locked.
+ *
+ * Unlocking fvp here temporarily is more or less safe,
+ * because with the directory locked there's not much
+ * that can happen to it. However, ideally it wouldn't
+ * be necessary. XXX.
+ */
+ VOP_UNLOCK(fvp);
+ /* remember fdvp == tdvp so tdvp is locked */
+ error = do_relookup(tdvp, &to_ulr, &tvp, tcnp);
+ if (error && error != ENOENT) {
+ VOP_UNLOCK(fdvp);
+ goto abort;
+ }
+ if (error == ENOENT) {
+ /*
+ * Note: currently if the name doesn't exist,
+ * relookup succeeds (it intercepts the
+ * EJUSTRETURN from VOP_LOOKUP) and sets tvp
+ * to NULL. Therefore, we will never get
+ * ENOENT and this branch is not needed.
+ * However, in a saner future the EJUSTRETURN
+ * garbage will go away, so let's DTRT.
+ */
+ tvp = NULL;
+ }
+
+ /* tvp is locked; lock fvp if necessary */
+ if (!tvp || tvp != fvp) {
+ vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
+ }
+ } else {
+ int found_fdvp;
+ struct vnode *illegal_fvp;
+
+ /*
+ * The source must not be above the destination. (If
+ * it were, the rename would detach a section of the
+ * tree.)
+ *
+ * Look up the tree from tdvp to see if we find fdvp,
+ * and if so, return the immediate child of fdvp we're
+ * under; that must not turn out to be the same as
+ * fvp.
*
- * Inline the relevant section of ufs_rename here, *before*
- * calling SET_DIROP_REMOVE.
+ * The per-volume rename lock guarantees that the
+ * result of this check remains true until we finish
+ * looking up and locking.
*/
+ error = ufs_parentcheck(fdvp, tdvp, fcnp->cn_cred,
+ &found_fdvp, &illegal_fvp);
+ if (error) {
+ goto abort;
+ }
+
+ /* Must lock in tree order. */
+
+ if (found_fdvp) {
+ /* fdvp -> fvp -> tdvp -> tvp */
+ error = lock_vnode_sequence(fdvp, &from_ulr,
+ &fvp, fcnp, 0,
+ EINVAL,
+ tdvp, &to_ulr,
+ &tvp, tcnp, 1);
+ } else {
+ /* tdvp -> tvp -> fdvp -> fvp */
+ error = lock_vnode_sequence(tdvp, &to_ulr,
+ &tvp, tcnp, 1,
+ ENOTEMPTY,
+ fdvp, &from_ulr,
+ &fvp, fcnp, 0);
+ }
+ if (error) {
+ if (illegal_fvp) {
+ vrele(illegal_fvp);
+ }
+ goto abort;
+ }
+ KASSERT(fvp != NULL);
+
+ if (illegal_fvp && fvp == illegal_fvp) {
+ vrele(illegal_fvp);
+ error = EINVAL;
+ goto abort_withlocks;
+ }
+
+ if (illegal_fvp) {
+ vrele(illegal_fvp);
+ }
+ }
+
+ KASSERT(fdvp && VOP_ISLOCKED(fdvp));
+ KASSERT(fvp && VOP_ISLOCKED(fvp));
+ KASSERT(tdvp && VOP_ISLOCKED(tdvp));
+ KASSERT(tvp == NULL || VOP_ISLOCKED(tvp));
+
+ /* --- everything is now locked --- */
+
if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
(VTOI(tdvp)->i_flags & APPEND))) {
error = EPERM;
- goto errout;
+ goto abort_withlocks;
}
+
+ /*
+ * Check if just deleting a link name.
+ */
if (fvp == tvp) {
if (fvp->v_type == VDIR) {
error = EINVAL;
- goto errout;
+ goto abort_withlocks;
}
- /* Release destination completely. */
+ /* Release destination completely. Leave fdvp locked. */
VOP_ABORTOP(tdvp, tcnp);
- vput(tdvp);
- vput(tvp);
+ if (fdvp != tdvp) {
+ VOP_UNLOCK(tdvp);
+ }
+ VOP_UNLOCK(tvp);
+ vrele(tdvp);
+ vrele(tvp);
/* Delete source. */
+ /* XXX: do we really need to relookup again? */
+
+ /*
+ * fdvp is still locked, but we just unlocked fvp
+ * (because fvp == tvp) so just decref fvp
+ */
vrele(fvp);
fcnp->cn_flags &= ~(MODMASK);
fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
fcnp->cn_nameiop = DELETE;
- vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = relookup(fdvp, &fvp, fcnp, 0))) {
vput(fdvp);
return (error);
@@ -884,28 +1243,436 @@ lfs_rename(void *v)
return (VOP_REMOVE(fdvp, fvp, fcnp));
}
+ /* The tiny bit of actual LFS code in this function */
if ((error = SET_DIROP_REMOVE(tdvp, tvp)) != 0)
- goto errout;
+ goto abort_withlocks;
MARK_VNODE(fdvp);
MARK_VNODE(fvp);
+ marked = 1;
+
+ fdp = VTOI(fdvp);
+ ip = VTOI(fvp);
+ if ((nlink_t) ip->i_nlink >= LINK_MAX) {
+ error = EMLINK;
+ goto abort_withlocks;
+ }
+ if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
+ (fdp->i_flags & APPEND)) {
+ error = EPERM;
+ goto abort_withlocks;
+ }
+ if ((ip->i_mode & IFMT) == IFDIR) {
+ /*
+ * Avoid ".", "..", and aliases of "." for obvious reasons.
+ */
+ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+ fdp == ip ||
+ (fcnp->cn_flags & ISDOTDOT) ||
+ (tcnp->cn_flags & ISDOTDOT) ||
+ (ip->i_flag & IN_RENAME)) {
+ error = EINVAL;
+ goto abort_withlocks;
+ }
+ ip->i_flag |= IN_RENAME;
+ doingdirectory = 1;
+ }
+ oldparent = fdp->i_number;
+ VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */
+
+ /*
+ * Both the directory
+ * and target vnodes are locked.
+ */
+ tdp = VTOI(tdvp);
+ txp = NULL;
+ if (tvp)
+ txp = VTOI(tvp);
+
+ mp = fdvp->v_mount;
+ fstrans_start(mp, FSTRANS_SHARED);
+
+ if (oldparent != tdp->i_number)
+ newparent = tdp->i_number;
+
+ /*
+ * If ".." must be changed (ie the directory gets a new
+ * parent) the user must have write permission in the source
+ * so as to be able to change "..".
+ */
+ if (doingdirectory && newparent) {
+ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
+ if (error)
+ goto out;
+ }
+
+ KASSERT(fdvp != tvp);
+
+ if (newparent) {
+ /* Check for the rename("foo/foo", "foo") case. */
+ if (fdvp == tvp) {
+ error = doingdirectory ? ENOTEMPTY : EISDIR;
+ goto out;
+ }
+ }
+
+ fxp = VTOI(fvp);
+ fdp = VTOI(fdvp);
- error = ufs_rename(ap);
+ error = UFS_WAPBL_BEGIN(fdvp->v_mount);
+ if (error)
+ goto out2;
+
+ /*
+ * 1) Bump link count while we're moving stuff
+ * around. If we crash somewhere before
+ * completing our work, the link count
+ * may be wrong, but correctable.
+ */
+ ip->i_nlink++;
+ DIP_ASSIGN(ip, nlink, ip->i_nlink);
+ ip->i_flag |= IN_CHANGE;
+ if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
+ goto bad;
+ }
+
+ /*
+ * 2) If target doesn't exist, link the target
+ * to the source and unlink the source.
+ * Otherwise, rewrite the target directory
+ * entry to reference the source inode and
+ * expunge the original entry's existence.
+ */
+ if (txp == NULL) {
+ if (tdp->i_dev != ip->i_dev)
+ panic("rename: EXDEV");
+ /*
+ * Account for ".." in new directory.
+ * When source and destination have the same
+ * parent we don't fool with the link count.
+ */
+ if (doingdirectory && newparent) {
+ if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
+ error = EMLINK;
+ goto bad;
+ }
+ tdp->i_nlink++;
+ DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+ tdp->i_flag |= IN_CHANGE;
+ if ((error = UFS_UPDATE(tdvp, NULL, NULL,
+ UPDATE_DIROP)) != 0) {
+ tdp->i_nlink--;
+ DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+ tdp->i_flag |= IN_CHANGE;
+ goto bad;
+ }
+ }
+ newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
+ ufs_makedirentry(ip, tcnp, newdir);
+ error = ufs_direnter(tdvp, &to_ulr,
+ NULL, newdir, tcnp, NULL);
+ pool_cache_put(ufs_direct_cache, newdir);
+ if (error != 0) {
+ if (doingdirectory && newparent) {
+ tdp->i_nlink--;
+ DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+ tdp->i_flag |= IN_CHANGE;
+ (void)UFS_UPDATE(tdvp, NULL, NULL,
+ UPDATE_WAIT | UPDATE_DIROP);
+ }
+ goto bad;
+ }
+ VN_KNOTE(tdvp, NOTE_WRITE);
+ } else {
+ if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
+ panic("rename: EXDEV");
+ /*
+ * Short circuit rename(foo, foo).
+ */
+ if (txp->i_number == ip->i_number)
+ panic("rename: same file");
+ /*
+ * If the parent directory is "sticky", then the user must
+ * own the parent directory, or the destination of the rename,
+ * otherwise the destination may not be changed (except by
+ * root). This implements append-only directories.
+ */
+ if ((tdp->i_mode & S_ISTXT) &&
+ kauth_authorize_generic(tcnp->cn_cred,
+ KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
+ kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
+ txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
+ error = EPERM;
+ goto bad;
+ }
+ /*
+ * Target must be empty if a directory and have no links
+ * to it. Also, ensure source and target are compatible
+ * (both directories, or both not directories).
+ */
+ if ((txp->i_mode & IFMT) == IFDIR) {
+ if (txp->i_nlink > 2 ||
+ !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
+ error = ENOTEMPTY;
+ goto bad;
+ }
+ if (!doingdirectory) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ cache_purge(tdvp);
+ } else if (doingdirectory) {
+ error = EISDIR;
+ goto bad;
+ }
+ if ((error = ufs_dirrewrite(tdp, to_ulr.ulr_offset,
+ txp, ip->i_number,
+ IFTODT(ip->i_mode), doingdirectory && newparent ?
+ newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
+ goto bad;
+ if (doingdirectory) {
+ /*
+ * Truncate inode. The only stuff left in the directory
+ * is "." and "..". The "." reference is inconsequential
+ * since we are quashing it. We have removed the "."
+ * reference and the reference in the parent directory,
+ * but there may be other hard links.
+ */
+ if (!newparent) {
+ tdp->i_nlink--;
+ DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
+ tdp->i_flag |= IN_CHANGE;
+ UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
+ }
+ txp->i_nlink--;
+ DIP_ASSIGN(txp, nlink, txp->i_nlink);
+ txp->i_flag |= IN_CHANGE;
+ if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+ tcnp->cn_cred)))
+ goto bad;
+ }
+ VN_KNOTE(tdvp, NOTE_WRITE);
+ VN_KNOTE(tvp, NOTE_DELETE);
+ }
+
+ /*
+ * Handle case where the directory entry we need to remove,
+ * which is/was at from_ulr.ulr_offset, or the one before it,
+ * which is/was at from_ulr.ulr_offset - from_ulr.ulr_count,
+ * may have been moved when the directory insertion above
+ * performed compaction.
+ */
+ if (tdp->i_number == fdp->i_number &&
+ ulr_overlap(&from_ulr, &to_ulr)) {
+
+ struct buf *bp;
+ struct direct *ep;
+ struct ufsmount *ump = fdp->i_ump;
+ doff_t curpos;
+ doff_t endsearch; /* offset to end directory search */
+ uint32_t prev_reclen;
+ int dirblksiz = ump->um_dirblksiz;
+ const int needswap = UFS_MPNEEDSWAP(ump);
+ u_long bmask;
+ int namlen, entryoffsetinblock;
+ char *dirbuf;
+
+ bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
+
+ /*
+ * The fcnp entry will be somewhere between the start of
+ * compaction (to_ulr.ulr_offset) and the original location
+ * (from_ulr.ulr_offset).
+ */
+ curpos = to_ulr.ulr_offset;
+ endsearch = from_ulr.ulr_offset + from_ulr.ulr_reclen;
+ entryoffsetinblock = 0;
+
+ /*
+ * Get the directory block containing the start of
+ * compaction.
+ */
+ error = ufs_blkatoff(fdvp, (off_t)to_ulr.ulr_offset, &dirbuf,
+ &bp, false);
+ if (error)
+ goto bad;
+
+ /*
+ * Keep existing ulr_count (length of previous record)
+ * for the case where compaction did not include the
+ * previous entry but started at the from-entry.
+ */
+ prev_reclen = from_ulr.ulr_count;
+
+ while (curpos < endsearch) {
+ uint32_t reclen;
+
+ /*
+ * If necessary, get the next directory block.
+ *
+ * dholland 7/13/11 to the best of my understanding
+ * this should never happen; compaction occurs only
+ * within single blocks. I think.
+ */
+ if ((curpos & bmask) == 0) {
+ if (bp != NULL)
+ brelse(bp, 0);
+ error = ufs_blkatoff(fdvp, (off_t)curpos,
+ &dirbuf, &bp, false);
+ if (error)
+ goto bad;
+ entryoffsetinblock = 0;
+ }
+
+ KASSERT(bp != NULL);
+ ep = (struct direct *)(dirbuf + entryoffsetinblock);
+ reclen = ufs_rw16(ep->d_reclen, needswap);
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (FSFMT(fdvp) && needswap == 0)
+ namlen = ep->d_type;
+ else
+ namlen = ep->d_namlen;
+#else
+ if (FSFMT(fdvp) && needswap != 0)
+ namlen = ep->d_type;
+ else
+ namlen = ep->d_namlen;
+#endif
+ if ((ep->d_ino != 0) &&
+ (ufs_rw32(ep->d_ino, needswap) != WINO) &&
+ (namlen == fcnp->cn_namelen) &&
+ memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
+ from_ulr.ulr_reclen = reclen;
+ break;
+ }
+ curpos += reclen;
+ entryoffsetinblock += reclen;
+ prev_reclen = reclen;
+ }
+
+ from_ulr.ulr_offset = curpos;
+ from_ulr.ulr_count = prev_reclen;
+
+ KASSERT(curpos <= endsearch);
+
+ /*
+ * If ulr_offset points to start of a directory block,
+ * clear ulr_count so ufs_dirremove() doesn't try to
+ * merge free space over a directory block boundary.
+ */
+ if ((from_ulr.ulr_offset & (dirblksiz - 1)) == 0)
+ from_ulr.ulr_count = 0;
+
+ brelse(bp, 0);
+ }
+
+ /*
+ * 3) Unlink the source.
+ */
+
+#if 0
+ /*
+ * Ensure that the directory entry still exists and has not
+ * changed while the new name has been entered. If the source is
+ * a file then the entry may have been unlinked or renamed. In
+ * either case there is no further work to be done. If the source
+ * is a directory then it cannot have been rmdir'ed; The IRENAME
+ * flag ensures that it cannot be moved by another rename or removed
+ * by a rmdir.
+ */
+#endif
+ KASSERT(fxp == ip);
+
+ /*
+ * If the source is a directory with a new parent, the link
+ * count of the old parent directory must be decremented and
+ * ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
+ KASSERT(fdp != NULL);
+ ufs_dirrewrite(fxp, mastertemplate.dot_reclen,
+ fdp, newparent, DT_DIR, 0, IN_CHANGE);
+ cache_purge(fdvp);
+ }
+ error = ufs_dirremove(fdvp, &from_ulr,
+ fxp, fcnp->cn_flags, 0);
+ fxp->i_flag &= ~IN_RENAME;
+
+ VN_KNOTE(fvp, NOTE_RENAME);
+ goto done;
+
+ out:
+ goto out2;
+
+ /* exit routines from steps 1 & 2 */
+ bad:
+ if (doingdirectory)
+ ip->i_flag &= ~IN_RENAME;
+ ip->i_nlink--;
+ DIP_ASSIGN(ip, nlink, ip->i_nlink);
+ ip->i_flag |= IN_CHANGE;
+ ip->i_flag &= ~IN_RENAME;
+ UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
+ done:
+ UFS_WAPBL_END(fdvp->v_mount);
+ out2:
+ /*
+ * clear IN_RENAME - some exit paths happen too early to go
+ * through the cleanup done in the "bad" case above, so we
+ * always do this mini-cleanup here.
+ */
+ ip->i_flag &= ~IN_RENAME;
+
+ VOP_UNLOCK(fdvp);
+ if (tdvp != fdvp) {
+ VOP_UNLOCK(tdvp);
+ }
+ VOP_UNLOCK(fvp);
+ if (tvp && tvp != fvp) {
+ VOP_UNLOCK(tvp);
+ }
+
+ vrele(fdvp);
+ vrele(tdvp);
+ vrele(fvp);
+ if (tvp) {
+ vrele(tvp);
+ }
+
+ fstrans_done(mp);
+ if (marked) {
UNMARK_VNODE(fdvp);
UNMARK_VNODE(fvp);
SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename");
+ }
return (error);
- errout:
- VOP_ABORTOP(tdvp, ap->a_tcnp); /* XXX, why not in NFS? */
- if (tdvp == tvp)
+ abort_withlocks:
+ VOP_UNLOCK(fdvp);
+ if (tdvp != fdvp) {
+ VOP_UNLOCK(tdvp);
+ }
+ VOP_UNLOCK(fvp);
+ if (tvp && tvp != fvp) {
+ VOP_UNLOCK(tvp);
+ }
+
+ abort:
+ VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
+ VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
vrele(tdvp);
- else
- vput(tdvp);
- if (tvp)
- vput(tvp);
- VOP_ABORTOP(fdvp, ap->a_fcnp); /* XXX, why not in NFS? */
+ if (tvp) {
+ vrele(tvp);
+ }
vrele(fdvp);
+ if (fvp) {
vrele(fvp);
+ }
+ if (marked) {
+ UNMARK_VNODE(fdvp);
+ UNMARK_VNODE(fvp);
+ SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename");
+ }
return (error);
}
Index: src/tests/fs/vfs/t_renamerace.c
diff -u src/tests/fs/vfs/t_renamerace.c:1.24 src/tests/fs/vfs/t_renamerace.c:1.24.4.1
--- src/tests/fs/vfs/t_renamerace.c:1.24 Sat Oct 8 13:08:54 2011
+++ src/tests/fs/vfs/t_renamerace.c Sat Mar 17 17:40:08 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: t_renamerace.c,v 1.24 2011/10/08 13:08:54 njoly Exp $ */
+/* $NetBSD: t_renamerace.c,v 1.24.4.1 2012/03/17 17:40:08 bouyer Exp $ */
/*
* Modified for rump and atf from a program supplied
@@ -81,9 +81,6 @@ renamerace(const atf_tc_t *tc, const cha
pthread_t pt1[NWRK], pt2[NWRK];
int i;
- if (FSTYPE_LFS(tc))
- atf_tc_expect_signal(-1, "PR kern/43582");
-
if (FSTYPE_RUMPFS(tc))
atf_tc_skip("rename not supported by file system");
@@ -106,13 +103,6 @@ renamerace(const atf_tc_t *tc, const cha
pthread_join(pt2[i], NULL);
RL(rump_sys_chdir("/"));
- /*
- * XXX: does not always fail on LFS, especially for unicpu
- * configurations. see other ramblings about racy tests.
- */
- if (FSTYPE_LFS(tc))
- abort();
-
if (FSTYPE_MSDOS(tc)) {
atf_tc_expect_fail("PR kern/44661");
/*
@@ -139,7 +129,7 @@ renamerace_dirs(const atf_tc_t *tc, cons
atf_tc_skip("rename not supported by file system");
/* XXX: msdosfs also sometimes hangs */
- if (FSTYPE_EXT2FS(tc) || FSTYPE_LFS(tc) || FSTYPE_MSDOS(tc))
+ if (FSTYPE_EXT2FS(tc) || FSTYPE_MSDOS(tc))
atf_tc_expect_signal(-1, "PR kern/43626");
/* XXX: unracy execution not caught */
@@ -164,7 +154,7 @@ renamerace_dirs(const atf_tc_t *tc, cons
* Doesn't always trigger when run on a slow backend
* (i.e. not on tmpfs/mfs). So do the usual kludge.
*/
- if (FSTYPE_EXT2FS(tc) || FSTYPE_LFS(tc) || FSTYPE_MSDOS(tc))
+ if (FSTYPE_EXT2FS(tc) || FSTYPE_MSDOS(tc))
abort();
if (FSTYPE_P2K_FFS(tc)) {
Index: src/tests/fs/vfs/t_rmdirrace.c
diff -u src/tests/fs/vfs/t_rmdirrace.c:1.8 src/tests/fs/vfs/t_rmdirrace.c:1.8.4.1
--- src/tests/fs/vfs/t_rmdirrace.c:1.8 Sat Oct 8 13:08:54 2011
+++ src/tests/fs/vfs/t_rmdirrace.c Sat Mar 17 17:40:07 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: t_rmdirrace.c,v 1.8 2011/10/08 13:08:54 njoly Exp $ */
+/* $NetBSD: t_rmdirrace.c,v 1.8.4.1 2012/03/17 17:40:07 bouyer Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -68,8 +68,6 @@ race(const atf_tc_t *tc, const char *pat
int res, fd, quit;
pthread_t th1, th2;
- if (FSTYPE_LFS(tc))
- atf_tc_expect_signal(-1, "PR kern/43582");
if (FSTYPE_SYSVBFS(tc))
atf_tc_skip("directories not supported by file system");
@@ -103,14 +101,6 @@ race(const atf_tc_t *tc, const char *pat
res = rump_sys_fchdir(fd);
if (res == -1)
atf_tc_fail("fchdir failed");
-
- /*
- * Rarely the LFS test does not crash. atf currently has no way of
- * saying "just chill even if the test doesn't fail", so this
- * takes care of it.
- */
- if (FSTYPE_LFS(tc))
- abort();
}
ATF_FSAPPLY(race, "rmdir(2) race");