Module Name:    src
Committed By:   riastradh
Date:           Thu Aug 18 21:42:19 UTC 2011

Modified Files:
        src/sys/fs/tmpfs: tmpfs_vnops.c

Log Message:
Fix tmpfs_rename locking.

Fixes PR kern/36681.  tmpfs now survives dirconc, all our vfs/tmpfs
tests and rename races in atf, and a bunch of hand-written tests
that I'd commit if atf didn't find them highly indigestible.

ok dholland


To generate a diff of this commit:
cvs rdiff -u -r1.88 -r1.89 src/sys/fs/tmpfs/tmpfs_vnops.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/fs/tmpfs/tmpfs_vnops.c
diff -u src/sys/fs/tmpfs/tmpfs_vnops.c:1.88 src/sys/fs/tmpfs/tmpfs_vnops.c:1.89
--- src/sys/fs/tmpfs/tmpfs_vnops.c:1.88	Wed Jul 13 03:28:41 2011
+++ src/sys/fs/tmpfs/tmpfs_vnops.c	Thu Aug 18 21:42:18 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: tmpfs_vnops.c,v 1.88 2011/07/13 03:28:41 riastradh Exp $	*/
+/*	$NetBSD: tmpfs_vnops.c,v 1.89 2011/08/18 21:42:18 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.88 2011/07/13 03:28:41 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.89 2011/08/18 21:42:18 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
@@ -798,27 +798,8 @@
 }
 
 /*
- * tmpfs_parentcheck_p: check if 'lower' is a descendent of 'upper'.
- *
- * => Returns 'true' if parent, and 'false' otherwise.
- */
-static inline bool
-tmpfs_parentcheck_p(tmpfs_node_t *lower, tmpfs_node_t *upper)
-{
-	tmpfs_node_t *un = lower;
-
-	while (un != un->tn_spec.tn_dir.tn_parent) {
-		KASSERT(un->tn_type == VDIR);
-		if (un == upper) {
-			return true;
-		}
-		un = un->tn_spec.tn_dir.tn_parent;
-	}
-	return false;
-}
-
-/*
- * tmpfs_rename: rename routine.
+ * tmpfs_rename: rename routine, the hairiest system call, with the
+ * insane API.
  *
  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
  * and tvp (to-leaf), if exists (NULL if not).
@@ -829,6 +810,57 @@
  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
  *    to release the references and unlock them (or destroy).
  */
+
+/*
+ * First, some forward declarations of subroutines.
+ */
+
+static int tmpfs_sane_rename(struct vnode *, struct componentname *,
+    struct vnode *, struct componentname *, kauth_cred_t, bool);
+static int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
+    kauth_cred_t,
+    struct vnode *, struct tmpfs_node *, struct componentname *,
+    struct tmpfs_dirent **, struct vnode **,
+    struct vnode *, struct tmpfs_node *, struct componentname *,
+    struct tmpfs_dirent **, struct vnode **);
+static int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
+    kauth_cred_t,
+    struct vnode *, struct tmpfs_node *,
+    struct componentname *, struct tmpfs_dirent **, struct vnode **,
+    struct componentname *, struct tmpfs_dirent **, struct vnode **);
+static int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
+    kauth_cred_t,
+    struct vnode *, struct tmpfs_node *, struct componentname *,
+    struct tmpfs_dirent **, struct vnode **,
+    struct vnode *, struct tmpfs_node *, struct componentname *,
+    struct tmpfs_dirent **, struct vnode **);
+static void tmpfs_rename_exit(struct tmpfs_mount *,
+    struct vnode *, struct vnode *, struct vnode *, struct vnode *);
+static int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
+static int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
+    struct tmpfs_node **);
+static int tmpfs_rename_lock(struct mount *, kauth_cred_t, int,
+    struct vnode *, struct tmpfs_node *, struct componentname *, bool,
+    struct tmpfs_dirent **, struct vnode **,
+    struct vnode *, struct tmpfs_node *, struct componentname *, bool,
+    struct tmpfs_dirent **, struct vnode **);
+static void tmpfs_rename_attachdetach(struct tmpfs_mount *,
+    struct vnode *, struct tmpfs_dirent *, struct vnode *,
+    struct vnode *, struct tmpfs_dirent *, struct vnode *);
+static int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
+    struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, kauth_cred_t);
+static int tmpfs_rename_check_possible(struct tmpfs_node *,
+    struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
+static int tmpfs_rename_check_permitted(kauth_cred_t,
+    struct tmpfs_node *, struct tmpfs_node *,
+    struct tmpfs_node *, struct tmpfs_node *);
+static int tmpfs_remove_check_possible(struct tmpfs_node *,
+    struct tmpfs_node *);
+static int tmpfs_remove_check_permitted(kauth_cred_t,
+    struct tmpfs_node *, struct tmpfs_node *);
+static int tmpfs_check_sticky(kauth_cred_t,
+    struct tmpfs_node *, struct tmpfs_node *);
+
 int
 tmpfs_rename(void *v)
 {
@@ -840,198 +872,1196 @@
 		struct vnode		*a_tvp;
 		struct componentname	*a_tcnp;
 	} */ *ap = v;
-	vnode_t *fdvp = ap->a_fdvp;
-	vnode_t *fvp = ap->a_fvp;
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *fvp = ap->a_fvp;
 	struct componentname *fcnp = ap->a_fcnp;
-	vnode_t *tdvp = ap->a_tdvp;
-	vnode_t *tvp = ap->a_tvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
-	tmpfs_node_t *fdnode, *fnode, *tnode, *tdnode;
-	tmpfs_dirent_t *de;
-	tmpfs_mount_t *tmp;
-	size_t namelen;
-	char *newname;
+	kauth_cred_t cred;
 	int error;
 
-	KASSERT(VOP_ISLOCKED(tdvp));
-	KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
-	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
-	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
+	KASSERT(fdvp != NULL);
+	KASSERT(fvp != NULL);
+	KASSERT(fcnp != NULL);
+	KASSERT(fcnp->cn_nameptr != NULL);
+	KASSERT(tdvp != NULL);
+	KASSERT(tcnp != NULL);
+	KASSERT(fcnp->cn_nameptr != NULL);
+	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
+	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
+	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
+	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
+	KASSERT(fdvp->v_type == VDIR);
+	KASSERT(tdvp->v_type == VDIR);
 
-	newname = NULL;
-	namelen = 0;
-	tmp = NULL;
-
-	/* Disallow cross-device renames. */
-	if (fvp->v_mount != tdvp->v_mount ||
-	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
-		error = EXDEV;
-		goto out_unlocked;
-	}
+	cred = fcnp->cn_cred;
+	KASSERT(tcnp->cn_cred == cred);
+
+	/*
+	 * Sanitize our world from the VFS insanity.  Unlock the target
+	 * directory and node, which are locked.  Release the children,
+	 * which are referenced.  Check for rename("x", "y/."), which
+	 * it is our responsibility to reject, not the caller's.  (But
+	 * the caller does reject rename("x/.", "y").  Go figure.)
+	 */
 
-	fnode = VP_TO_TMPFS_NODE(fvp);
-	fdnode = VP_TO_TMPFS_DIR(fdvp);
-	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
-	tdnode = VP_TO_TMPFS_DIR(tdvp);
-	tmp = VFS_TO_TMPFS(tdvp->v_mount);
+	VOP_UNLOCK(tdvp);
+	if ((tvp != NULL) && (tvp != tdvp))
+		VOP_UNLOCK(tvp);
 
-	if (fdvp == tvp) {
-		error = 0;
-		goto out_unlocked;
+	vrele(fvp);
+	if (tvp != NULL)
+		vrele(tvp);
+
+	if (tvp == tdvp) {
+		error = EINVAL;
+		goto out;
 	}
 
-	/* Allocate memory, if necessary, for a new name. */
-	namelen = tcnp->cn_namelen;
+	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, false);
+
+out:	/*
+	 * All done, whether with success or failure.  Release the
+	 * directory nodes now, as the caller expects from the VFS
+	 * protocol.
+	 */
+	vrele(fdvp);
+	vrele(tdvp);
+
+	return error;
+}
+
+/*
+ * tmpfs_sane_rename: rename routine, the hairiest system call, with
+ * the sane API.
+ *
+ * Arguments:
+ *
+ * . fdvp (from directory vnode),
+ * . fcnp (from component name),
+ * . tdvp (to directory vnode), and
+ * . tcnp (to component name).
+ *
+ * fdvp and tdvp must be referenced and unlocked.
+ */
+static int
+tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
+    struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t cred,
+    bool posixly_correct)
+{
+	struct mount *mount;
+	struct tmpfs_mount *tmpfs;
+	struct tmpfs_node *fdnode, *tdnode;
+	struct tmpfs_dirent *fde, *tde;
+	struct vnode *fvp, *tvp;
+	char *newname;
+	int error;
+
+	KASSERT(fdvp != NULL);
+	KASSERT(fcnp != NULL);
+	KASSERT(tdvp != NULL);
+	KASSERT(tcnp != NULL);
+	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
+	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
+	KASSERT(fdvp->v_type == VDIR);
+	KASSERT(tdvp->v_type == VDIR);
+	KASSERT(fdvp->v_mount == tdvp->v_mount);
+	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
+	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
+	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
+	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
+	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
+	    (fcnp->cn_nameptr[1] != '.'));
+	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
+	    (tcnp->cn_nameptr[1] != '.'));
+
+	/*
+	 * Pull out the tmpfs data structures.
+	 */
+	fdnode = VP_TO_TMPFS_NODE(fdvp);
+	tdnode = VP_TO_TMPFS_NODE(tdvp);
+	KASSERT(fdnode != NULL);
+	KASSERT(tdnode != NULL);
+	KASSERT(fdnode->tn_vnode == fdvp);
+	KASSERT(tdnode->tn_vnode == tdvp);
+	KASSERT(fdnode->tn_type == VDIR);
+	KASSERT(tdnode->tn_type == VDIR);
+
+	mount = fdvp->v_mount;
+	KASSERT(mount != NULL);
+	KASSERT(mount == tdvp->v_mount);
+	/* XXX How can we be sure this stays true?  (Not that you're
+	 * likely to mount a tmpfs read-only...)  */
+	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
+	tmpfs = VFS_TO_TMPFS(mount);
+	KASSERT(tmpfs != NULL);
+
+	/*
+	 * Decide whether we need a new name, and allocate memory for
+	 * it if so.  Do this before locking anything or taking
+	 * destructive actions so that we can back out safely and sleep
+	 * safely.  XXX Is sleeping an issue here?  Can this just be
+	 * moved into tmpfs_rename_attachdetach?
+	 */
 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
-		newname = tmpfs_strname_alloc(tmp, namelen);
+		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
 		if (newname == NULL) {
 			error = ENOSPC;
 			goto out_unlocked;
 		}
+	} else {
+		newname = NULL;
 	}
 
-	/* XXX: Lock order violation! */
-	if (fdvp != tdvp) {
-		vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-	}
-	if (fvp != tvp) {
-		vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
-	}
+	/*
+	 * Lock and look up everything.  GCC is not very clever.
+	 */
+	fde = tde = NULL;
+	fvp = tvp = NULL;
+	error = tmpfs_rename_enter(mount, tmpfs, cred,
+	    fdvp, fdnode, fcnp, &fde, &fvp,
+	    tdvp, tdnode, tcnp, &tde, &tvp);
+	if (error)
+		goto out_unlocked;
 
-	/* If the inode we were renaming has scarpered, just give up. */
-	de = tmpfs_dir_lookup(fdnode, fcnp);
-	if (de == NULL || de->td_node != fnode) {
-		error = ENOENT;
-		goto out;
-	}
+	/*
+	 * Check that everything is locked and looks right.
+	 */
+	KASSERT(fde != NULL);
+	KASSERT(fvp != NULL);
+	KASSERT(fde->td_node != NULL);
+	KASSERT(fde->td_node->tn_vnode == fvp);
+	KASSERT(fde->td_node->tn_type == fvp->v_type);
+	KASSERT((tde == NULL) == (tvp == NULL));
+	KASSERT((tde == NULL) || (tde->td_node != NULL));
+	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
+	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
+	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
+	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
 
 	/*
-	 * If source and target is the same vnode - it is either invalid
-	 * rename of a directory, or a hard link.  Remove the source link,
-	 * if the later.
+	 * If the source and destination are the same object, we need
+	 * only at most delete the source entry.
 	 */
 	if (fvp == tvp) {
-		if (fvp->v_type == VDIR) {
+		KASSERT(tvp != NULL);
+		if (fde->td_node->tn_type == VDIR) {
+			/* XXX How can this possibly happen?  */
 			error = EINVAL;
-			goto out;
+			goto out_locked;
 		}
-		/*
-		 * Detach and free the directory entry.  Drops the link
-		 * count on the inode.
-		 */
-		KASSERT(fnode == tnode);
-		tmpfs_dir_detach(fdvp, de);
-		tmpfs_free_dirent(tmp, de);
-		goto out_ok;
+		if (!posixly_correct && (fde != tde)) {
+			/* XXX Doesn't work because of locking.
+			 * error = VOP_REMOVE(fdvp, fvp);
+			 */
+			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
+			    cred);
+			if (error)
+				goto out_locked;
+		}
+		goto success;
 	}
+	KASSERT(fde != tde);
+	KASSERT(fvp != tvp);
 
-	/* If replacing an existing entry, ensure we can do the operation. */
+	/*
+	 * If the target exists, refuse to rename a directory over a
+	 * non-directory or vice versa, or to clobber a non-empty
+	 * directory.
+	 */
 	if (tvp != NULL) {
-		KASSERT(tnode != NULL);
-		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
-			if (tnode->tn_size > 0) {
-				error = ENOTEMPTY;
-				goto out;
-			}
-		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
+		KASSERT(tde != NULL);
+		KASSERT(tde->td_node != NULL);
+		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
+			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
+		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
 			error = ENOTDIR;
-			goto out;
-		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
+		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
 			error = EISDIR;
-			goto out;
+		else
+			error = 0;
+		if (error)
+			goto out_locked;
+		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
+	}
+
+	/*
+	 * Authorize the rename.
+	 */
+	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
+	    tdnode, (tde? tde->td_node : NULL));
+	if (error)
+		goto out_locked;
+	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
+	    tdnode, (tde? tde->td_node : NULL));
+	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp,
+	    error);
+	error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp,
+	    error);
+	if (error)
+		goto out_locked;
+
+	/*
+	 * Everything is hunky-dory.  Shuffle the directory entries.
+	 */
+	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
+
+	/*
+	 * Update the directory entry's name necessary, and flag
+	 * metadata updates.  A memory allocation failure here is not
+	 * OK because we've already committed some changes that we
+	 * can't back out at this point, and we have things locked so
+	 * we can't sleep, hence the early allocation above.
+	 */
+	if (newname != NULL) {
+		KASSERT(tcnp->cn_namelen <= MAXNAMLEN);
+
+		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
+		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
+		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
+		/* Commit newname and don't free it on the way out.  */
+		fde->td_name = newname;
+		newname = NULL;
+
+		fde->td_node->tn_status |= TMPFS_NODE_CHANGED;
+		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
+	}
+
+success:
+	VN_KNOTE(fvp, NOTE_RENAME);
+	error = 0;
+
+out_locked:
+	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
+
+out_unlocked:
+	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
+	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
+	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
+	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
+
+	if (newname != NULL)
+		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
+
+	return error;
+}
+
+/*
+ * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
+ * and the associated vnode in fvp_ret; fail if not found.  Look up
+ * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
+ * associated vnode in tvp_ret; store null instead if not found.  Fail
+ * if anything has been mounted on any of the nodes involved.
+ *
+ * fdvp and tdvp must be referenced.
+ *
+ * On entry, nothing is locked.
+ *
+ * On success, everything is locked, and *fvp_ret, and *tvp_ret if
+ * nonnull, are referenced.  The only pairs of vnodes that may be
+ * identical are {fdvp, tdvp} and {fvp, tvp}.
+ *
+ * On failure, everything remains as was.
+ *
+ * Locking everything including the source and target nodes is
+ * necessary to make sure that, e.g., link count updates are OK.  The
+ * locking order is, in general, ancestor-first, matching the order you
+ * need to use to look up a descendant anyway.
+ */
+static int
+tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
+    kauth_cred_t cred,
+    struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
+    struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
+    struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
+    struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
+{
+	int error;
+
+	KASSERT(mount != NULL);
+	KASSERT(tmpfs != NULL);
+	KASSERT(fdvp != NULL);
+	KASSERT(fdnode != NULL);
+	KASSERT(fcnp != NULL);
+	KASSERT(fde_ret != NULL);
+	KASSERT(fvp_ret != NULL);
+	KASSERT(tdvp != NULL);
+	KASSERT(tdnode != NULL);
+	KASSERT(tcnp != NULL);
+	KASSERT(tde_ret != NULL);
+	KASSERT(tvp_ret != NULL);
+	KASSERT(fdnode->tn_vnode == fdvp);
+	KASSERT(tdnode->tn_vnode == tdvp);
+	KASSERT(fdnode->tn_type == VDIR);
+	KASSERT(tdnode->tn_type == VDIR);
+
+	if (fdvp == tdvp) {
+		KASSERT(fdnode == tdnode);
+		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
+		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
+	} else {
+		KASSERT(fdnode != tdnode);
+		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
+		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
+		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
+	}
+
+	if (error)
+		return error;
+
+	KASSERT(*fde_ret != NULL);
+	KASSERT(*fvp_ret != NULL);
+	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
+	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
+	KASSERT((*tde_ret == NULL) ||
+	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
+	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
+	KASSERT((*tvp_ret == NULL) ||
+	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
+	KASSERT(*fvp_ret != fdvp);
+	KASSERT(*fvp_ret != tdvp);
+	KASSERT(*tvp_ret != fdvp);
+	KASSERT(*tvp_ret != tdvp);
+	return 0;
+}
+
+/*
+ * Lock and look up with a common source/target directory.
+ */
+static int
+tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
+    kauth_cred_t cred,
+    struct vnode *dvp, struct tmpfs_node *dnode,
+    struct componentname *fcnp,
+    struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
+    struct componentname *tcnp,
+    struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
+{
+	struct tmpfs_dirent *fde, *tde;
+	struct vnode *fvp, *tvp;
+	int error;
+
+	error = tmpfs_rename_lock_directory(dvp, dnode);
+	if (error)
+		goto fail0;
+
+	/* Did we lose a race with mount?  */
+	if (dvp->v_mountedhere != NULL) {
+		error = EBUSY;
+		goto fail1;
+	}
+
+	/* Make sure the caller may read the directory.  */
+	error = VOP_ACCESS(dvp, VEXEC, cred);
+	if (error)
+		goto fail1;
+
+	/*
+	 * The order in which we lock the source and target nodes is
+	 * irrelevant because there can only be one rename on this
+	 * directory in flight at a time, and we have it locked.
+	 */
+
+	fde = tmpfs_dir_lookup(dnode, fcnp);
+	if (fde == NULL) {
+		error = ENOENT;
+		goto fail1;
+	}
+
+	KASSERT(fde->td_node != NULL);
+	/* We ruled out `.' earlier.  */
+	KASSERT(fde->td_node != dnode);
+	/* We ruled out `..' earlier.  */
+	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
+	mutex_enter(&fde->td_node->tn_vlock);
+	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
+	if (error)
+		goto fail1;
+	KASSERT(fvp != NULL);
+	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
+	KASSERT(fvp != dvp);
+	KASSERT(fvp->v_mount == mount);
+
+	/* Refuse to rename a mount point.  */
+	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
+		error = EBUSY;
+		goto fail2;
+	}
+
+	tde = tmpfs_dir_lookup(dnode, tcnp);
+	if (tde == NULL) {
+		tvp = NULL;
+	} else {
+		KASSERT(tde->td_node != NULL);
+		/* We ruled out `.' earlier.  */
+		KASSERT(tde->td_node != dnode);
+		/* We ruled out `..' earlier.  */
+		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
+		if (tde->td_node != fde->td_node) {
+			mutex_enter(&tde->td_node->tn_vlock);
+			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
+			if (error)
+				goto fail2;
+			KASSERT(tvp->v_mount == mount);
+			/* Refuse to rename over a mount point.  */
+			if ((tvp->v_type == VDIR) &&
+			    (tvp->v_mountedhere != NULL)) {
+				error = EBUSY;
+				goto fail3;
+			}
 		} else {
-			KASSERT(fnode->tn_type != VDIR);
-			KASSERT(tnode->tn_type != VDIR);
+			tvp = fvp;
+			vref(tvp);
 		}
+		KASSERT(tvp != NULL);
+		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
 	}
+	KASSERT(tvp != dvp);
 
-	/* Are we moving the inode to a different directory? */
-	if (fdvp != tdvp) {
-		/*
-		 * If we are moving a directory - ensure that it is not
-		 * parent of a target directory.  Otherwise, it would
-		 * result in stale nodes.
-		 */
-		if (fnode->tn_type == VDIR && 
-		    tmpfs_parentcheck_p(tdnode, fnode)) {
-			error = EINVAL;
-			goto out;
-		}
+	*fde_ret = fde;
+	*fvp_ret = fvp;
+	*tde_ret = tde;
+	*tvp_ret = tvp;
+	return 0;
 
-		/*
-		 * Perform the move: detach from the source directory and
-		 * attach into the target directory.
-		 */
-		tmpfs_dir_detach(fdvp, de);
-		tmpfs_dir_attach(tdvp, de, fnode);
+fail3:	if (tvp != NULL) {
+		if (tvp != fvp)
+			vput(tvp);
+		else
+			vrele(tvp);
+	}
 
-	} else if (tvp == NULL) {
-		/* Trigger the event, if not overwriting. */
-		VN_KNOTE(tdvp, NOTE_WRITE);
+fail2:	vput(fvp);
+fail1:	VOP_UNLOCK(dvp);
+fail0:	return error;
+}
+
+/*
+ * Lock and look up with separate source and target directories.
+ */
+static int
+tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
+    kauth_cred_t cred,
+    struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
+    struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
+    struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
+    struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
+{
+	struct tmpfs_node *intermediate_node;
+	struct tmpfs_dirent *fde, *tde;
+	struct vnode *fvp, *tvp;
+	int error;
+
+	KASSERT(fdvp != tdvp);
+	KASSERT(fdnode != tdnode);
+
+#if 0				/* XXX */
+	mutex_enter(&tmpfs->tm_rename_lock);
+#endif
+
+	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
+	if (error)
+		goto fail;
+
+	/*
+	 * intermediate_node == NULL means fdnode is not an ancestor of
+	 * tdnode.
+	 */
+	if (intermediate_node == NULL)
+		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
+		    tdvp, tdnode, tcnp, true, &tde, &tvp,
+		    fdvp, fdnode, fcnp, false, &fde, &fvp);
+	else
+		error = tmpfs_rename_lock(mount, cred, EINVAL,
+		    fdvp, fdnode, fcnp, false, &fde, &fvp,
+		    tdvp, tdnode, tcnp, true, &tde, &tvp);
+	if (error)
+		goto fail;
+
+	KASSERT(fde != NULL);
+	KASSERT(fde->td_node != NULL);
+
+	/*
+	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
+	 */
+	if (fde->td_node == intermediate_node) {
+		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
+		return EINVAL;
 	}
 
-	/* Are we overwriting the entry? */
+	*fde_ret = fde;
+	*fvp_ret = fvp;
+	*tde_ret = tde;
+	*tvp_ret = tvp;
+	return 0;
+
+fail:
+#if 0				/* XXX */
+	mutex_exit(&tmpfs->tm_rename_lock);
+#endif
+	return error;
+}
+
+/*
+ * Unlock everything we locked for rename.
+ *
+ * fdvp and tdvp must be referenced.
+ *
+ * On entry, everything is locked, and fvp and tvp referenced.
+ *
+ * On exit, everything is unlocked, and fvp and tvp are released.
+ */
+static void
+tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
+    struct vnode *fdvp, struct vnode *fvp,
+    struct vnode *tdvp, struct vnode *tvp)
+{
+
+	KASSERT(tmpfs != NULL);
+	KASSERT(fdvp != NULL);
+	KASSERT(fvp != NULL);
+	KASSERT(fdvp != fvp);
+	KASSERT(fdvp != tvp);
+	KASSERT(tdvp != tvp);
+	KASSERT(tdvp != fvp);
+	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
+	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
+
 	if (tvp != NULL) {
-		tmpfs_dirent_t *tde;
+		if (tvp != fvp)
+			vput(tvp);
+		else
+			vrele(tvp);
+	}
+	VOP_UNLOCK(tdvp);
+	vput(fvp);
+	if (fdvp != tdvp)
+		VOP_UNLOCK(fdvp);
+
+#if 0				/* XXX */
+	if (fdvp != tdvp)
+		mutex_exit(&tmpfs->tm_rename_lock);
+#endif
+}
+
+/*
+ * Lock a directory, but fail if it has been rmdir'd.
+ *
+ * vp must be referenced.
+ */
+static int
+tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
+{
+
+	KASSERT(vp != NULL);
+	KASSERT(node != NULL);
+	KASSERT(node->tn_vnode == vp);
+	KASSERT(node->tn_type == VDIR);
 
-		tde = tmpfs_dir_cached(tnode);
-		if (tde == NULL) {
-			tde = tmpfs_dir_lookup(tdnode, tcnp);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	if (node->tn_spec.tn_dir.tn_parent == NULL) {
+		VOP_UNLOCK(vp);
+		return ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Analyze the genealogy of the source and target nodes.
+ *
+ * On success, stores in *intermediate_node_ret either the child of
+ * fdnode of which tdnode is a descendant, or null if tdnode is not a
+ * descendant of fdnode at all.
+ *
+ * fdnode and tdnode must be unlocked and referenced.  The file
+ * system's rename lock must also be held, to exclude concurrent
+ * changes to the file system's genealogy other than rmdir.
+ *
+ * XXX This causes an extra lock/unlock of tdnode in the case when
+ * we're just about to lock it again before locking anything else.
+ * However, changing that requires reorganizing the code to make it
+ * even more horrifically obscure.
+ */
+static int
+tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
+    struct tmpfs_node **intermediate_node_ret)
+{
+	struct tmpfs_node *node = tdnode, *parent;
+	int error;
+
+	KASSERT(fdnode != NULL);
+	KASSERT(tdnode != NULL);
+	KASSERT(fdnode != tdnode);
+	KASSERT(intermediate_node_ret != NULL);
+
+	KASSERT(fdnode->tn_vnode != NULL);
+	KASSERT(tdnode->tn_vnode != NULL);
+	KASSERT(fdnode->tn_type == VDIR);
+	KASSERT(tdnode->tn_type == VDIR);
+
+	/*
+	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
+	 * from deleting it -- or any ancestor -- at an inopportune
+	 * moment.
+	 */
+	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
+	if (error)
+		return error;
+
+	for (;;) {
+		parent = node->tn_spec.tn_dir.tn_parent;
+		KASSERT(parent != NULL);
+		KASSERT(parent->tn_type == VDIR);
+
+		/* Did we hit the root without finding fdnode?  */
+		if (parent == node) {
+			*intermediate_node_ret = NULL;
+			break;
 		}
-		KASSERT(tde && tde->td_node == tnode);
-		KASSERT((tnode->tn_type == VDIR) == (fnode->tn_type == VDIR));
 
-		/*
-		 * Remove and destroy the directory entry on the target
-		 * directory, since we overwrite it.
-		 */
-		tmpfs_dir_detach(tdvp, tde);
-		tmpfs_free_dirent(tmp, tde);
+		/* Did we find that fdnode is an ancestor?  */
+		if (parent == fdnode) {
+			*intermediate_node_ret = node;
+			break;
+		}
+
+		/* Neither -- keep ascending the family tree.  */
+		node = parent;
 	}
 
-	/* If the name has changed, update directory entry. */
-	if (newname != NULL) {
-		KASSERT(tcnp->cn_namelen < MAXNAMLEN);
+	VOP_UNLOCK(tdnode->tn_vnode);
+	return 0;
+}
 
-		tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
-		de->td_namelen = (uint16_t)namelen;
-		memcpy(newname, tcnp->cn_nameptr, namelen);
-		de->td_name = newname;
-		newname = NULL;
+/*
+ * Lock directories a and b, which must be distinct, and look up and
+ * lock nodes a and b.  Do a first and then b.  Directory b may not be
+ * an ancestor of directory a, although directory a may be an ancestor
+ * of directory b.  Fail with overlap_error if node a is directory b.
+ * Neither componentname may be `.' or `..'.
+ *
+ * a_dvp and b_dvp must be referenced.
+ *
+ * On entry, a_dvp and b_dvp are unlocked.
+ *
+ * On success,
+ * . a_dvp and b_dvp are locked,
+ * . *a_dirent_ret is filled with a directory entry whose node is
+ *     locked and referenced,
+ * . *b_vp_ret is filled with the corresponding vnode,
+ * . *b_dirent_ret is filled either with null or with a directory entry
+ *     whose node is locked and referenced,
+ * . *b_vp is filled either with null or with the corresponding vnode,
+ *     and
+ * . the only pair of vnodes that may be identical is a_vp and b_vp.
+ *
+ * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
+ * *a_vp, *b_dirent_ret, and *b_vp are left alone.
+ */
+static int
+tmpfs_rename_lock(struct mount *mount, kauth_cred_t cred, int overlap_error,
+    struct vnode *a_dvp, struct tmpfs_node *a_dnode,
+    struct componentname *a_cnp, bool a_missing_ok,
+    struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
+    struct vnode *b_dvp, struct tmpfs_node *b_dnode,
+    struct componentname *b_cnp, bool b_missing_ok,
+    struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
+{
+	struct tmpfs_dirent *a_dirent, *b_dirent;
+	struct vnode *a_vp, *b_vp;
+	int error;
 
-		fnode->tn_status |= TMPFS_NODE_CHANGED;
-		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
+	KASSERT(a_dvp != NULL);
+	KASSERT(a_dnode != NULL);
+	KASSERT(a_cnp != NULL);
+	KASSERT(a_dirent_ret != NULL);
+	KASSERT(a_vp_ret != NULL);
+	KASSERT(b_dvp != NULL);
+	KASSERT(b_dnode != NULL);
+	KASSERT(b_cnp != NULL);
+	KASSERT(b_dirent_ret != NULL);
+	KASSERT(b_vp_ret != NULL);
+	KASSERT(a_dvp != b_dvp);
+	KASSERT(a_dnode != b_dnode);
+	KASSERT(a_dnode->tn_vnode == a_dvp);
+	KASSERT(b_dnode->tn_vnode == b_dvp);
+	KASSERT(a_dnode->tn_type == VDIR);
+	KASSERT(b_dnode->tn_type == VDIR);
+	KASSERT(a_missing_ok != b_missing_ok);
+
+	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
+	if (error)
+		goto fail0;
+
+	/* Did we lose a race with mount?  */
+	if (a_dvp->v_mountedhere != NULL) {
+		error = EBUSY;
+		goto fail1;
+	}
+
+	/* Make sure the caller may read the directory.  */
+	error = VOP_ACCESS(a_dvp, VEXEC, cred);
+	if (error)
+		goto fail1;
+
+	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
+	if (a_dirent != NULL) {
+		KASSERT(a_dirent->td_node != NULL);
+		/* We ruled out `.' earlier.  */
+		KASSERT(a_dirent->td_node != a_dnode);
+		/* We ruled out `..' earlier.  */
+		KASSERT(a_dirent->td_node !=
+		    a_dnode->tn_spec.tn_dir.tn_parent);
+		if (a_dirent->td_node == b_dnode) {
+			error = overlap_error;
+			goto fail1;
+		}
+		mutex_enter(&a_dirent->td_node->tn_vlock);
+		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
+		if (error)
+			goto fail1;
+		KASSERT(a_vp->v_mount == mount);
+		/* Refuse to rename (over) a mount point.  */
+		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
+			error = EBUSY;
+			goto fail2;
+		}
+	} else if (!a_missing_ok) {
+		error = ENOENT;
+		goto fail1;
+	} else {
+		a_vp = NULL;
 	}
-out_ok:
-	/* Trigger the rename event. */
-	VN_KNOTE(fvp, NOTE_RENAME);
-	error = 0;
-out:
+	KASSERT(a_vp != a_dvp);
+	KASSERT(a_vp != b_dvp);
+
+	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
+	if (error)
+		goto fail2;
+
+	/* Did we lose a race with mount?  */
+	if (b_dvp->v_mountedhere != NULL) {
+		error = EBUSY;
+		goto fail3;
+	}
+
+	/* Make sure the caller may read the directory.  */
+	error = VOP_ACCESS(b_dvp, VEXEC, cred);
+	if (error)
+		goto fail3;
+
+	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
+	if (b_dirent != NULL) {
+		KASSERT(b_dirent->td_node != NULL);
+		/* We ruled out `.' earlier.  */
+		KASSERT(b_dirent->td_node != b_dnode);
+		/* We ruled out `..' earlier.  */
+		KASSERT(b_dirent->td_node !=
+		    b_dnode->tn_spec.tn_dir.tn_parent);
+		/* b is not an ancestor of a.  */
+		KASSERT(b_dirent->td_node != a_dnode);
+		/* But the source and target nodes might be the same.  */
+		if ((a_dirent == NULL) ||
+		    (a_dirent->td_node != b_dirent->td_node)) {
+			mutex_enter(&b_dirent->td_node->tn_vlock);
+			error = tmpfs_vnode_get(mount, b_dirent->td_node,
+			    &b_vp);
+			if (error)
+				goto fail3;
+			KASSERT(b_vp->v_mount == mount);
+			KASSERT(a_vp != b_vp);
+			/* Refuse to rename (over) a mount point.  */
+			if ((b_vp->v_type == VDIR) &&
+			    (b_vp->v_mountedhere != NULL)) {
+				error = EBUSY;
+				goto fail4;
+			}
+		} else {
+			b_vp = a_vp;
+			vref(b_vp);
+		}
+	} else if (!b_missing_ok) {
+		error = ENOENT;
+		goto fail3;
+	} else {
+		b_vp = NULL;
+	}
+	KASSERT(b_vp != a_dvp);
+	KASSERT(b_vp != b_dvp);
+
+	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
+	KASSERT(a_missing_ok || (a_dirent != NULL));
+	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
+	KASSERT(b_missing_ok || (b_dirent != NULL));
+	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
+	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
+	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
+	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
+	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
+	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
+	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
+
+	*a_dirent_ret = a_dirent;
+	*b_dirent_ret = b_dirent;
+	*a_vp_ret = a_vp;
+	*b_vp_ret = b_vp;
+	return 0;
+
+fail4:	if (b_vp != NULL) {
+		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
+		if (b_vp != a_vp)
+			vput(b_vp);
+		else
+			vrele(a_vp);
+	}
+
+fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
+	VOP_UNLOCK(b_dvp);
+
+fail2:	if (a_vp != NULL) {
+		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
+		vput(a_vp);
+	}
+
+fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
+	VOP_UNLOCK(a_dvp);
+
+fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
+	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
+	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
+	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
+	return error;
+}
+
+/*
+ * Shuffle the directory entries to move fvp from the directory fdvp
+ * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
+ * we are overwriting a target node, it is tvp, and tde is its
+ * directory entry in tdvp.
+ *
+ * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
+ */
+static void
+tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
+    struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
+    struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
+{
+
+	KASSERT(tmpfs != NULL);
+	KASSERT(fdvp != NULL);
+	KASSERT(fde != NULL);
+	KASSERT(fvp != NULL);
+	KASSERT(tdvp != NULL);
+	KASSERT(fde->td_node != NULL);
+	KASSERT(fde->td_node->tn_vnode == fvp);
+	KASSERT((tde == NULL) == (tvp == NULL));
+	KASSERT((tde == NULL) || (tde->td_node != NULL));
+	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
+	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
+	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
+
+	/*
+	 * If we are moving from one directory to another, detach the
+	 * source entry and reattach it to the target directory.
+	 */
 	if (fdvp != tdvp) {
-		VOP_UNLOCK(fdvp);
+		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
+		struct tmpfs_node *fnode = fde->td_node;
+		tmpfs_dir_detach(fdvp, fde);
+		tmpfs_dir_attach(tdvp, fde, fnode);
+	} else if (tvp == NULL) {
+		/*
+		 * We are changing the directory.  tmpfs_dir_attach and
+		 * tmpfs_dir_detach note the events for us, but for
+		 * this case we don't call them, so we must note the
+		 * event explicitly.
+		 */
+		VN_KNOTE(fdvp, NOTE_WRITE);
 	}
-	if (fvp != tvp) {
-		VOP_UNLOCK(fvp);
+
+	/*
+	 * If we are replacing an existing target entry, delete it.
+	 */
+	if (tde != NULL) {
+		KASSERT(tvp != NULL);
+		KASSERT(tde->td_node != NULL);
+		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
+		if (tde->td_node->tn_type == VDIR) {
+			KASSERT(tde->td_node->tn_size == 0);
+			KASSERT(tde->td_node->tn_links == 2);
+			/* Decrement the extra link count for `.' so
+			 * the vnode will be recycled when released.  */
+			tde->td_node->tn_links--;
+		}
+		tmpfs_dir_detach(tdvp, tde);
+		tmpfs_free_dirent(tmpfs, tde);
 	}
-out_unlocked:
-	/* Release target nodes. */
-	if (tdvp == tvp) {
-		vrele(tdvp);
-	} else {
-		vput(tdvp);
+}
+
+/*
+ * Remove the entry de for the non-directory vp from the directory dvp.
+ *
+ * Everything must be locked and referenced.
+ */
+static int
+tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
+    struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
+    kauth_cred_t cred)
+{
+	int error;
+
+	KASSERT(tmpfs != NULL);
+	KASSERT(dvp != NULL);
+	KASSERT(dnode != NULL);
+	KASSERT(de != NULL);
+	KASSERT(vp != NULL);
+	KASSERT(dnode->tn_vnode == dvp);
+	KASSERT(de->td_node != NULL);
+	KASSERT(de->td_node->tn_vnode == vp);
+	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
+
+	error = tmpfs_remove_check_possible(dnode, de->td_node);
+	if (error)
+		return error;
+
+	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
+	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp,
+	    error);
+	if (error)
+		return error;
+
+	tmpfs_dir_detach(dvp, de);
+	tmpfs_free_dirent(tmpfs, de);
+
+	return 0;
+}
+
+/*
+ * Check whether a rename is possible independent of credentials.
+ *
+ * Everything must be locked and referenced.
+ */
+static int
+tmpfs_rename_check_possible(
+    struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
+    struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
+{
+
+	KASSERT(fdnode != NULL);
+	KASSERT(fnode != NULL);
+	KASSERT(tdnode != NULL);
+	KASSERT(fdnode != fnode);
+	KASSERT(tdnode != tnode);
+	KASSERT(fnode != tnode);
+	KASSERT(fdnode->tn_vnode != NULL);
+	KASSERT(fnode->tn_vnode != NULL);
+	KASSERT(tdnode->tn_vnode != NULL);
+	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
+	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT((tnode == NULL) ||
+	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
+
+	/*
+	 * If fdnode is immutable, we can't write to it.  If fdnode is
+	 * append-only, the only change we can make is to add entries
+	 * to it.  If fnode is immutable, we can't change the links to
+	 * it.  If fnode is append-only...well, this is what UFS does.
+	 */
+	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
+		return EPERM;
+
+	/*
+	 * If tdnode is immutable, we can't write to it.  If tdnode is
+	 * append-only, we can add entries, but we can't change
+	 * existing entries.
+	 */
+	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
+		return EPERM;
+
+	/*
+	 * If tnode is immutable, we can't replace links to it.  If
+	 * tnode is append-only...well, this is what UFS does.
+	 */
+	if (tnode != NULL) {
+		KASSERT(tnode != NULL);
+		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
+			return EPERM;
 	}
-	if (tvp) {
-		vput(tvp);
+
+	return 0;
+}
+
+/*
+ * Check whether a rename is permitted given our credentials.
+ *
+ * Everything must be locked and referenced.
+ */
+static int
+tmpfs_rename_check_permitted(kauth_cred_t cred,
+    struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
+    struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
+{
+	int error;
+
+	KASSERT(fdnode != NULL);
+	KASSERT(fnode != NULL);
+	KASSERT(tdnode != NULL);
+	KASSERT(fdnode != fnode);
+	KASSERT(tdnode != tnode);
+	KASSERT(fnode != tnode);
+	KASSERT(fdnode->tn_vnode != NULL);
+	KASSERT(fnode->tn_vnode != NULL);
+	KASSERT(tdnode->tn_vnode != NULL);
+	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
+	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT((tnode == NULL) ||
+	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
+
+	/*
+	 * We need to remove or change an entry in the source directory.
+	 */
+	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred);
+	if (error)
+		return error;
+
+	/*
+	 * If we are changing directories, then we need to write to the
+	 * target directory to add or change an entry.  Also, if fnode
+	 * is a directory, we need to write to it to change its `..'
+	 * entry.
+	 */
+	if (fdnode != tdnode) {
+		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred);
+		if (error)
+			return error;
+		if (fnode->tn_type == VDIR) {
+			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred);
+			if (error)
+				return error;
+		}
 	}
 
-	/* Release source nodes. */
-	vrele(fdvp);
-	vrele(fvp);
+	error = tmpfs_check_sticky(cred, fdnode, fnode);
+	if (error)
+		return error;
 
-	if (newname != NULL) {
-		tmpfs_strname_free(tmp, newname, namelen);
+	error = tmpfs_check_sticky(cred, tdnode, tnode);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+/*
+ * Check whether removing node's entry in dnode is possible independent
+ * of credentials.
+ *
+ * Everything must be locked and referenced.
+ */
+static int
+tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
+{
+
+	KASSERT(dnode != NULL);
+	KASSERT(dnode->tn_vnode != NULL);
+	KASSERT(node != NULL);
+	KASSERT(dnode != node);
+	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
+
+	/*
+	 * We want to delete the entry.  If dnode is immutable, we
+	 * can't write to it to delete the entry.  If dnode is
+	 * append-only, the only change we can make is to add entries,
+	 * so we can't delete entries.  If node is immutable, we can't
+	 * change the links to it, so we can't delete the entry.  If
+	 * node is append-only...well, this is what UFS does.
+	 */
+	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
+		return EPERM;
+
+	return 0;
+}
+
+/*
+ * Check whether removing node's entry in dnode is permitted given our
+ * credentials.
+ *
+ * Everything must be locked and referenced.
+ */
+static int
+tmpfs_remove_check_permitted(kauth_cred_t cred,
+    struct tmpfs_node *dnode, struct tmpfs_node *node)
+{
+	int error;
+
+	KASSERT(dnode != NULL);
+	KASSERT(dnode->tn_vnode != NULL);
+	KASSERT(node != NULL);
+	KASSERT(dnode != node);
+	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
+
+	/*
+	 * Check whether we are permitted to write to the source
+	 * directory in order to delete an entry from it.
+	 */
+	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred);
+	if (error)
+		return error;
+
+	error = tmpfs_check_sticky(cred, dnode, node);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+/*
+ * Check whether we may change an entry in a sticky directory.  If the
+ * directory is sticky, the user must own either the directory or, if
+ * it exists, the node, in order to change the entry.
+ *
+ * Everything must be locked and referenced.
+ */
+static int
+tmpfs_check_sticky(kauth_cred_t cred,
+    struct tmpfs_node *dnode, struct tmpfs_node *node)
+{
+
+	KASSERT(dnode != NULL);
+	KASSERT(dnode->tn_vnode != NULL);
+	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
+	KASSERT((node == NULL) || (node->tn_vnode != NULL));
+	KASSERT((node == NULL) ||
+	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
+
+	if (dnode->tn_mode & S_ISTXT) {
+		uid_t euid = kauth_cred_geteuid(cred);
+		if (euid == dnode->tn_uid)
+			return 0;
+		if ((node == NULL) || (euid == node->tn_uid))
+			return 0;
+		return EPERM;
 	}
-	return error;
+
+	return 0;
 }
 
 int

Reply via email to