Module Name: src Committed By: riastradh Date: Thu Aug 18 21:42:19 UTC 2011
Modified Files: src/sys/fs/tmpfs: tmpfs_vnops.c Log Message: Fix tmpfs_rename locking. Fixes PR kern/36681. tmpfs now survives dirconc, all our vfs/tmpfs tests and rename races in atf, and a bunch of hand-written tests that I'd commit if atf didn't find them highly indigestible. ok dholland To generate a diff of this commit: cvs rdiff -u -r1.88 -r1.89 src/sys/fs/tmpfs/tmpfs_vnops.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/fs/tmpfs/tmpfs_vnops.c diff -u src/sys/fs/tmpfs/tmpfs_vnops.c:1.88 src/sys/fs/tmpfs/tmpfs_vnops.c:1.89 --- src/sys/fs/tmpfs/tmpfs_vnops.c:1.88 Wed Jul 13 03:28:41 2011 +++ src/sys/fs/tmpfs/tmpfs_vnops.c Thu Aug 18 21:42:18 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs_vnops.c,v 1.88 2011/07/13 03:28:41 riastradh Exp $ */ +/* $NetBSD: tmpfs_vnops.c,v 1.89 2011/08/18 21:42:18 riastradh Exp $ */ /* * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.88 2011/07/13 03:28:41 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.89 2011/08/18 21:42:18 riastradh Exp $"); #include <sys/param.h> #include <sys/dirent.h> @@ -798,27 +798,8 @@ } /* - * tmpfs_parentcheck_p: check if 'lower' is a descendent of 'upper'. - * - * => Returns 'true' if parent, and 'false' otherwise. - */ -static inline bool -tmpfs_parentcheck_p(tmpfs_node_t *lower, tmpfs_node_t *upper) -{ - tmpfs_node_t *un = lower; - - while (un != un->tn_spec.tn_dir.tn_parent) { - KASSERT(un->tn_type == VDIR); - if (un == upper) { - return true; - } - un = un->tn_spec.tn_dir.tn_parent; - } - return false; -} - -/* - * tmpfs_rename: rename routine. + * tmpfs_rename: rename routine, the hairiest system call, with the + * insane API. * * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent) * and tvp (to-leaf), if exists (NULL if not). @@ -829,6 +810,57 @@ * => Both tdvp and tvp are referenced and locked. It is our responsibility * to release the references and unlock them (or destroy). */ + +/* + * First, some forward declarations of subroutines. + */ + +static int tmpfs_sane_rename(struct vnode *, struct componentname *, + struct vnode *, struct componentname *, kauth_cred_t, bool); +static int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *, + kauth_cred_t, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **); +static int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *, + kauth_cred_t, + struct vnode *, struct tmpfs_node *, + struct componentname *, struct tmpfs_dirent **, struct vnode **, + struct componentname *, struct tmpfs_dirent **, struct vnode **); +static int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *, + kauth_cred_t, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **); +static void tmpfs_rename_exit(struct tmpfs_mount *, + struct vnode *, struct vnode *, struct vnode *, struct vnode *); +static int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *); +static int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *, + struct tmpfs_node **); +static int tmpfs_rename_lock(struct mount *, kauth_cred_t, int, + struct vnode *, struct tmpfs_node *, struct componentname *, bool, + struct tmpfs_dirent **, struct vnode **, + struct vnode *, struct tmpfs_node *, struct componentname *, bool, + struct tmpfs_dirent **, struct vnode **); +static void tmpfs_rename_attachdetach(struct tmpfs_mount *, + struct vnode *, struct tmpfs_dirent *, struct vnode *, + struct vnode *, struct tmpfs_dirent *, struct vnode *); +static int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *, + struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, kauth_cred_t); +static int tmpfs_rename_check_possible(struct tmpfs_node *, + struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *); +static int tmpfs_rename_check_permitted(kauth_cred_t, + struct tmpfs_node *, struct tmpfs_node *, + struct tmpfs_node *, struct tmpfs_node *); +static int tmpfs_remove_check_possible(struct tmpfs_node *, + struct tmpfs_node *); +static int tmpfs_remove_check_permitted(kauth_cred_t, + struct tmpfs_node *, struct tmpfs_node *); +static int tmpfs_check_sticky(kauth_cred_t, + struct tmpfs_node *, struct tmpfs_node *); + int tmpfs_rename(void *v) { @@ -840,198 +872,1196 @@ struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap = v; - vnode_t *fdvp = ap->a_fdvp; - vnode_t *fvp = ap->a_fvp; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *fvp = ap->a_fvp; struct componentname *fcnp = ap->a_fcnp; - vnode_t *tdvp = ap->a_tdvp; - vnode_t *tvp = ap->a_tvp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *tvp = ap->a_tvp; struct componentname *tcnp = ap->a_tcnp; - tmpfs_node_t *fdnode, *fnode, *tnode, *tdnode; - tmpfs_dirent_t *de; - tmpfs_mount_t *tmp; - size_t namelen; - char *newname; + kauth_cred_t cred; int error; - KASSERT(VOP_ISLOCKED(tdvp)); - KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); - KASSERT((fcnp->cn_flags & ISDOTDOT) == 0); - KASSERT((tcnp->cn_flags & ISDOTDOT) == 0); + KASSERT(fdvp != NULL); + KASSERT(fvp != NULL); + KASSERT(fcnp != NULL); + KASSERT(fcnp->cn_nameptr != NULL); + KASSERT(tdvp != NULL); + KASSERT(tcnp != NULL); + KASSERT(fcnp->cn_nameptr != NULL); + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + KASSERT(fdvp->v_type == VDIR); + KASSERT(tdvp->v_type == VDIR); - newname = NULL; - namelen = 0; - tmp = NULL; - - /* Disallow cross-device renames. */ - if (fvp->v_mount != tdvp->v_mount || - (tvp != NULL && fvp->v_mount != tvp->v_mount)) { - error = EXDEV; - goto out_unlocked; - } + cred = fcnp->cn_cred; + KASSERT(tcnp->cn_cred == cred); + + /* + * Sanitize our world from the VFS insanity. Unlock the target + * directory and node, which are locked. Release the children, + * which are referenced. Check for rename("x", "y/."), which + * it is our responsibility to reject, not the caller's. (But + * the caller does reject rename("x/.", "y"). Go figure.) + */ - fnode = VP_TO_TMPFS_NODE(fvp); - fdnode = VP_TO_TMPFS_DIR(fdvp); - tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); - tdnode = VP_TO_TMPFS_DIR(tdvp); - tmp = VFS_TO_TMPFS(tdvp->v_mount); + VOP_UNLOCK(tdvp); + if ((tvp != NULL) && (tvp != tdvp)) + VOP_UNLOCK(tvp); - if (fdvp == tvp) { - error = 0; - goto out_unlocked; + vrele(fvp); + if (tvp != NULL) + vrele(tvp); + + if (tvp == tdvp) { + error = EINVAL; + goto out; } - /* Allocate memory, if necessary, for a new name. */ - namelen = tcnp->cn_namelen; + error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, false); + +out: /* + * All done, whether with success or failure. Release the + * directory nodes now, as the caller expects from the VFS + * protocol. + */ + vrele(fdvp); + vrele(tdvp); + + return error; +} + +/* + * tmpfs_sane_rename: rename routine, the hairiest system call, with + * the sane API. + * + * Arguments: + * + * . fdvp (from directory vnode), + * . fcnp (from component name), + * . tdvp (to directory vnode), and + * . tcnp (to component name). + * + * fdvp and tdvp must be referenced and unlocked. + */ +static int +tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp, + struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t cred, + bool posixly_correct) +{ + struct mount *mount; + struct tmpfs_mount *tmpfs; + struct tmpfs_node *fdnode, *tdnode; + struct tmpfs_dirent *fde, *tde; + struct vnode *fvp, *tvp; + char *newname; + int error; + + KASSERT(fdvp != NULL); + KASSERT(fcnp != NULL); + KASSERT(tdvp != NULL); + KASSERT(tcnp != NULL); + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ + KASSERT(fdvp->v_type == VDIR); + KASSERT(tdvp->v_type == VDIR); + KASSERT(fdvp->v_mount == tdvp->v_mount); + KASSERT((fcnp->cn_flags & ISDOTDOT) == 0); + KASSERT((tcnp->cn_flags & ISDOTDOT) == 0); + KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.')); + KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.')); + KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') || + (fcnp->cn_nameptr[1] != '.')); + KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') || + (tcnp->cn_nameptr[1] != '.')); + + /* + * Pull out the tmpfs data structures. + */ + fdnode = VP_TO_TMPFS_NODE(fdvp); + tdnode = VP_TO_TMPFS_NODE(tdvp); + KASSERT(fdnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode->tn_vnode == fdvp); + KASSERT(tdnode->tn_vnode == tdvp); + KASSERT(fdnode->tn_type == VDIR); + KASSERT(tdnode->tn_type == VDIR); + + mount = fdvp->v_mount; + KASSERT(mount != NULL); + KASSERT(mount == tdvp->v_mount); + /* XXX How can we be sure this stays true? (Not that you're + * likely to mount a tmpfs read-only...) */ + KASSERT((mount->mnt_flag & MNT_RDONLY) == 0); + tmpfs = VFS_TO_TMPFS(mount); + KASSERT(tmpfs != NULL); + + /* + * Decide whether we need a new name, and allocate memory for + * it if so. Do this before locking anything or taking + * destructive actions so that we can back out safely and sleep + * safely. XXX Is sleeping an issue here? Can this just be + * moved into tmpfs_rename_attachdetach? + */ if (tmpfs_strname_neqlen(fcnp, tcnp)) { - newname = tmpfs_strname_alloc(tmp, namelen); + newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen); if (newname == NULL) { error = ENOSPC; goto out_unlocked; } + } else { + newname = NULL; } - /* XXX: Lock order violation! */ - if (fdvp != tdvp) { - vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); - } - if (fvp != tvp) { - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); - } + /* + * Lock and look up everything. GCC is not very clever. + */ + fde = tde = NULL; + fvp = tvp = NULL; + error = tmpfs_rename_enter(mount, tmpfs, cred, + fdvp, fdnode, fcnp, &fde, &fvp, + tdvp, tdnode, tcnp, &tde, &tvp); + if (error) + goto out_unlocked; - /* If the inode we were renaming has scarpered, just give up. */ - de = tmpfs_dir_lookup(fdnode, fcnp); - if (de == NULL || de->td_node != fnode) { - error = ENOENT; - goto out; - } + /* + * Check that everything is locked and looks right. + */ + KASSERT(fde != NULL); + KASSERT(fvp != NULL); + KASSERT(fde->td_node != NULL); + KASSERT(fde->td_node->tn_vnode == fvp); + KASSERT(fde->td_node->tn_type == fvp->v_type); + KASSERT((tde == NULL) == (tvp == NULL)); + KASSERT((tde == NULL) || (tde->td_node != NULL)); + KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp)); + KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type)); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); /* - * If source and target is the same vnode - it is either invalid - * rename of a directory, or a hard link. Remove the source link, - * if the later. + * If the source and destination are the same object, we need + * only at most delete the source entry. */ if (fvp == tvp) { - if (fvp->v_type == VDIR) { + KASSERT(tvp != NULL); + if (fde->td_node->tn_type == VDIR) { + /* XXX How can this possibly happen? */ error = EINVAL; - goto out; + goto out_locked; } - /* - * Detach and free the directory entry. Drops the link - * count on the inode. - */ - KASSERT(fnode == tnode); - tmpfs_dir_detach(fdvp, de); - tmpfs_free_dirent(tmp, de); - goto out_ok; + if (!posixly_correct && (fde != tde)) { + /* XXX Doesn't work because of locking. + * error = VOP_REMOVE(fdvp, fvp); + */ + error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp, + cred); + if (error) + goto out_locked; + } + goto success; } + KASSERT(fde != tde); + KASSERT(fvp != tvp); - /* If replacing an existing entry, ensure we can do the operation. */ + /* + * If the target exists, refuse to rename a directory over a + * non-directory or vice versa, or to clobber a non-empty + * directory. + */ if (tvp != NULL) { - KASSERT(tnode != NULL); - if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { - if (tnode->tn_size > 0) { - error = ENOTEMPTY; - goto out; - } - } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { + KASSERT(tde != NULL); + KASSERT(tde->td_node != NULL); + if (fvp->v_type == VDIR && tvp->v_type == VDIR) + error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0); + else if (fvp->v_type == VDIR && tvp->v_type != VDIR) error = ENOTDIR; - goto out; - } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { + else if (fvp->v_type != VDIR && tvp->v_type == VDIR) error = EISDIR; - goto out; + else + error = 0; + if (error) + goto out_locked; + KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR)); + } + + /* + * Authorize the rename. + */ + error = tmpfs_rename_check_possible(fdnode, fde->td_node, + tdnode, (tde? tde->td_node : NULL)); + if (error) + goto out_locked; + error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node, + tdnode, (tde? tde->td_node : NULL)); + error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp, + error); + error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp, + error); + if (error) + goto out_locked; + + /* + * Everything is hunky-dory. Shuffle the directory entries. + */ + tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp); + + /* + * Update the directory entry's name necessary, and flag + * metadata updates. A memory allocation failure here is not + * OK because we've already committed some changes that we + * can't back out at this point, and we have things locked so + * we can't sleep, hence the early allocation above. + */ + if (newname != NULL) { + KASSERT(tcnp->cn_namelen <= MAXNAMLEN); + + tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen); + fde->td_namelen = (uint16_t)tcnp->cn_namelen; + (void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen); + /* Commit newname and don't free it on the way out. */ + fde->td_name = newname; + newname = NULL; + + fde->td_node->tn_status |= TMPFS_NODE_CHANGED; + tdnode->tn_status |= TMPFS_NODE_MODIFIED; + } + +success: + VN_KNOTE(fvp, NOTE_RENAME); + error = 0; + +out_locked: + tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp); + +out_unlocked: + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ + /* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */ + /* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ + + if (newname != NULL) + tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen); + + return error; +} + +/* + * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret + * and the associated vnode in fvp_ret; fail if not found. Look up + * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the + * associated vnode in tvp_ret; store null instead if not found. Fail + * if anything has been mounted on any of the nodes involved. + * + * fdvp and tdvp must be referenced. + * + * On entry, nothing is locked. + * + * On success, everything is locked, and *fvp_ret, and *tvp_ret if + * nonnull, are referenced. The only pairs of vnodes that may be + * identical are {fdvp, tdvp} and {fvp, tvp}. + * + * On failure, everything remains as was. + * + * Locking everything including the source and target nodes is + * necessary to make sure that, e.g., link count updates are OK. The + * locking order is, in general, ancestor-first, matching the order you + * need to use to look up a descendant anyway. + */ +static int +tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs, + kauth_cred_t cred, + struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp, + struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret, + struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp, + struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret) +{ + int error; + + KASSERT(mount != NULL); + KASSERT(tmpfs != NULL); + KASSERT(fdvp != NULL); + KASSERT(fdnode != NULL); + KASSERT(fcnp != NULL); + KASSERT(fde_ret != NULL); + KASSERT(fvp_ret != NULL); + KASSERT(tdvp != NULL); + KASSERT(tdnode != NULL); + KASSERT(tcnp != NULL); + KASSERT(tde_ret != NULL); + KASSERT(tvp_ret != NULL); + KASSERT(fdnode->tn_vnode == fdvp); + KASSERT(tdnode->tn_vnode == tdvp); + KASSERT(fdnode->tn_type == VDIR); + KASSERT(tdnode->tn_type == VDIR); + + if (fdvp == tdvp) { + KASSERT(fdnode == tdnode); + error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp, + fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret); + } else { + KASSERT(fdnode != tdnode); + error = tmpfs_rename_enter_separate(mount, tmpfs, cred, + fdvp, fdnode, fcnp, fde_ret, fvp_ret, + tdvp, tdnode, tcnp, tde_ret, tvp_ret); + } + + if (error) + return error; + + KASSERT(*fde_ret != NULL); + KASSERT(*fvp_ret != NULL); + KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL)); + KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL)); + KASSERT((*tde_ret == NULL) || + ((*tde_ret)->td_node->tn_vnode == *tvp_ret)); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT((*tvp_ret == NULL) || + (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE)); + KASSERT(*fvp_ret != fdvp); + KASSERT(*fvp_ret != tdvp); + KASSERT(*tvp_ret != fdvp); + KASSERT(*tvp_ret != tdvp); + return 0; +} + +/* + * Lock and look up with a common source/target directory. + */ +static int +tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs, + kauth_cred_t cred, + struct vnode *dvp, struct tmpfs_node *dnode, + struct componentname *fcnp, + struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret, + struct componentname *tcnp, + struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret) +{ + struct tmpfs_dirent *fde, *tde; + struct vnode *fvp, *tvp; + int error; + + error = tmpfs_rename_lock_directory(dvp, dnode); + if (error) + goto fail0; + + /* Did we lose a race with mount? */ + if (dvp->v_mountedhere != NULL) { + error = EBUSY; + goto fail1; + } + + /* Make sure the caller may read the directory. */ + error = VOP_ACCESS(dvp, VEXEC, cred); + if (error) + goto fail1; + + /* + * The order in which we lock the source and target nodes is + * irrelevant because there can only be one rename on this + * directory in flight at a time, and we have it locked. + */ + + fde = tmpfs_dir_lookup(dnode, fcnp); + if (fde == NULL) { + error = ENOENT; + goto fail1; + } + + KASSERT(fde->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(fde->td_node != dnode); + /* We ruled out `..' earlier. */ + KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent); + mutex_enter(&fde->td_node->tn_vlock); + error = tmpfs_vnode_get(mount, fde->td_node, &fvp); + if (error) + goto fail1; + KASSERT(fvp != NULL); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT(fvp != dvp); + KASSERT(fvp->v_mount == mount); + + /* Refuse to rename a mount point. */ + if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail2; + } + + tde = tmpfs_dir_lookup(dnode, tcnp); + if (tde == NULL) { + tvp = NULL; + } else { + KASSERT(tde->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(tde->td_node != dnode); + /* We ruled out `..' earlier. */ + KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent); + if (tde->td_node != fde->td_node) { + mutex_enter(&tde->td_node->tn_vlock); + error = tmpfs_vnode_get(mount, tde->td_node, &tvp); + if (error) + goto fail2; + KASSERT(tvp->v_mount == mount); + /* Refuse to rename over a mount point. */ + if ((tvp->v_type == VDIR) && + (tvp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail3; + } } else { - KASSERT(fnode->tn_type != VDIR); - KASSERT(tnode->tn_type != VDIR); + tvp = fvp; + vref(tvp); } + KASSERT(tvp != NULL); + KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); } + KASSERT(tvp != dvp); - /* Are we moving the inode to a different directory? */ - if (fdvp != tdvp) { - /* - * If we are moving a directory - ensure that it is not - * parent of a target directory. Otherwise, it would - * result in stale nodes. - */ - if (fnode->tn_type == VDIR && - tmpfs_parentcheck_p(tdnode, fnode)) { - error = EINVAL; - goto out; - } + *fde_ret = fde; + *fvp_ret = fvp; + *tde_ret = tde; + *tvp_ret = tvp; + return 0; - /* - * Perform the move: detach from the source directory and - * attach into the target directory. - */ - tmpfs_dir_detach(fdvp, de); - tmpfs_dir_attach(tdvp, de, fnode); +fail3: if (tvp != NULL) { + if (tvp != fvp) + vput(tvp); + else + vrele(tvp); + } - } else if (tvp == NULL) { - /* Trigger the event, if not overwriting. */ - VN_KNOTE(tdvp, NOTE_WRITE); +fail2: vput(fvp); +fail1: VOP_UNLOCK(dvp); +fail0: return error; +} + +/* + * Lock and look up with separate source and target directories. + */ +static int +tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs, + kauth_cred_t cred, + struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp, + struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret, + struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp, + struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret) +{ + struct tmpfs_node *intermediate_node; + struct tmpfs_dirent *fde, *tde; + struct vnode *fvp, *tvp; + int error; + + KASSERT(fdvp != tdvp); + KASSERT(fdnode != tdnode); + +#if 0 /* XXX */ + mutex_enter(&tmpfs->tm_rename_lock); +#endif + + error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node); + if (error) + goto fail; + + /* + * intermediate_node == NULL means fdnode is not an ancestor of + * tdnode. + */ + if (intermediate_node == NULL) + error = tmpfs_rename_lock(mount, cred, ENOTEMPTY, + tdvp, tdnode, tcnp, true, &tde, &tvp, + fdvp, fdnode, fcnp, false, &fde, &fvp); + else + error = tmpfs_rename_lock(mount, cred, EINVAL, + fdvp, fdnode, fcnp, false, &fde, &fvp, + tdvp, tdnode, tcnp, true, &tde, &tvp); + if (error) + goto fail; + + KASSERT(fde != NULL); + KASSERT(fde->td_node != NULL); + + /* + * Reject rename("foo/bar", "foo/bar/baz/quux/zot"). + */ + if (fde->td_node == intermediate_node) { + tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp); + return EINVAL; } - /* Are we overwriting the entry? */ + *fde_ret = fde; + *fvp_ret = fvp; + *tde_ret = tde; + *tvp_ret = tvp; + return 0; + +fail: +#if 0 /* XXX */ + mutex_exit(&tmpfs->tm_rename_lock); +#endif + return error; +} + +/* + * Unlock everything we locked for rename. + * + * fdvp and tdvp must be referenced. + * + * On entry, everything is locked, and fvp and tvp referenced. + * + * On exit, everything is unlocked, and fvp and tvp are released. + */ +static void +tmpfs_rename_exit(struct tmpfs_mount *tmpfs, + struct vnode *fdvp, struct vnode *fvp, + struct vnode *tdvp, struct vnode *tvp) +{ + + KASSERT(tmpfs != NULL); + KASSERT(fdvp != NULL); + KASSERT(fvp != NULL); + KASSERT(fdvp != fvp); + KASSERT(fdvp != tvp); + KASSERT(tdvp != tvp); + KASSERT(tdvp != fvp); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + if (tvp != NULL) { - tmpfs_dirent_t *tde; + if (tvp != fvp) + vput(tvp); + else + vrele(tvp); + } + VOP_UNLOCK(tdvp); + vput(fvp); + if (fdvp != tdvp) + VOP_UNLOCK(fdvp); + +#if 0 /* XXX */ + if (fdvp != tdvp) + mutex_exit(&tmpfs->tm_rename_lock); +#endif +} + +/* + * Lock a directory, but fail if it has been rmdir'd. + * + * vp must be referenced. + */ +static int +tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node) +{ + + KASSERT(vp != NULL); + KASSERT(node != NULL); + KASSERT(node->tn_vnode == vp); + KASSERT(node->tn_type == VDIR); - tde = tmpfs_dir_cached(tnode); - if (tde == NULL) { - tde = tmpfs_dir_lookup(tdnode, tcnp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + if (node->tn_spec.tn_dir.tn_parent == NULL) { + VOP_UNLOCK(vp); + return ENOENT; + } + + return 0; +} + +/* + * Analyze the genealogy of the source and target nodes. + * + * On success, stores in *intermediate_node_ret either the child of + * fdnode of which tdnode is a descendant, or null if tdnode is not a + * descendant of fdnode at all. + * + * fdnode and tdnode must be unlocked and referenced. The file + * system's rename lock must also be held, to exclude concurrent + * changes to the file system's genealogy other than rmdir. + * + * XXX This causes an extra lock/unlock of tdnode in the case when + * we're just about to lock it again before locking anything else. + * However, changing that requires reorganizing the code to make it + * even more horrifically obscure. + */ +static int +tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode, + struct tmpfs_node **intermediate_node_ret) +{ + struct tmpfs_node *node = tdnode, *parent; + int error; + + KASSERT(fdnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode != tdnode); + KASSERT(intermediate_node_ret != NULL); + + KASSERT(fdnode->tn_vnode != NULL); + KASSERT(tdnode->tn_vnode != NULL); + KASSERT(fdnode->tn_type == VDIR); + KASSERT(tdnode->tn_type == VDIR); + + /* + * We need to provisionally lock tdnode->tn_vnode to keep rmdir + * from deleting it -- or any ancestor -- at an inopportune + * moment. + */ + error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode); + if (error) + return error; + + for (;;) { + parent = node->tn_spec.tn_dir.tn_parent; + KASSERT(parent != NULL); + KASSERT(parent->tn_type == VDIR); + + /* Did we hit the root without finding fdnode? */ + if (parent == node) { + *intermediate_node_ret = NULL; + break; } - KASSERT(tde && tde->td_node == tnode); - KASSERT((tnode->tn_type == VDIR) == (fnode->tn_type == VDIR)); - /* - * Remove and destroy the directory entry on the target - * directory, since we overwrite it. - */ - tmpfs_dir_detach(tdvp, tde); - tmpfs_free_dirent(tmp, tde); + /* Did we find that fdnode is an ancestor? */ + if (parent == fdnode) { + *intermediate_node_ret = node; + break; + } + + /* Neither -- keep ascending the family tree. */ + node = parent; } - /* If the name has changed, update directory entry. */ - if (newname != NULL) { - KASSERT(tcnp->cn_namelen < MAXNAMLEN); + VOP_UNLOCK(tdnode->tn_vnode); + return 0; +} - tmpfs_strname_free(tmp, de->td_name, de->td_namelen); - de->td_namelen = (uint16_t)namelen; - memcpy(newname, tcnp->cn_nameptr, namelen); - de->td_name = newname; - newname = NULL; +/* + * Lock directories a and b, which must be distinct, and look up and + * lock nodes a and b. Do a first and then b. Directory b may not be + * an ancestor of directory a, although directory a may be an ancestor + * of directory b. Fail with overlap_error if node a is directory b. + * Neither componentname may be `.' or `..'. + * + * a_dvp and b_dvp must be referenced. + * + * On entry, a_dvp and b_dvp are unlocked. + * + * On success, + * . a_dvp and b_dvp are locked, + * . *a_dirent_ret is filled with a directory entry whose node is + * locked and referenced, + * . *b_vp_ret is filled with the corresponding vnode, + * . *b_dirent_ret is filled either with null or with a directory entry + * whose node is locked and referenced, + * . *b_vp is filled either with null or with the corresponding vnode, + * and + * . the only pair of vnodes that may be identical is a_vp and b_vp. + * + * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret, + * *a_vp, *b_dirent_ret, and *b_vp are left alone. + */ +static int +tmpfs_rename_lock(struct mount *mount, kauth_cred_t cred, int overlap_error, + struct vnode *a_dvp, struct tmpfs_node *a_dnode, + struct componentname *a_cnp, bool a_missing_ok, + struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret, + struct vnode *b_dvp, struct tmpfs_node *b_dnode, + struct componentname *b_cnp, bool b_missing_ok, + struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret) +{ + struct tmpfs_dirent *a_dirent, *b_dirent; + struct vnode *a_vp, *b_vp; + int error; - fnode->tn_status |= TMPFS_NODE_CHANGED; - tdnode->tn_status |= TMPFS_NODE_MODIFIED; + KASSERT(a_dvp != NULL); + KASSERT(a_dnode != NULL); + KASSERT(a_cnp != NULL); + KASSERT(a_dirent_ret != NULL); + KASSERT(a_vp_ret != NULL); + KASSERT(b_dvp != NULL); + KASSERT(b_dnode != NULL); + KASSERT(b_cnp != NULL); + KASSERT(b_dirent_ret != NULL); + KASSERT(b_vp_ret != NULL); + KASSERT(a_dvp != b_dvp); + KASSERT(a_dnode != b_dnode); + KASSERT(a_dnode->tn_vnode == a_dvp); + KASSERT(b_dnode->tn_vnode == b_dvp); + KASSERT(a_dnode->tn_type == VDIR); + KASSERT(b_dnode->tn_type == VDIR); + KASSERT(a_missing_ok != b_missing_ok); + + error = tmpfs_rename_lock_directory(a_dvp, a_dnode); + if (error) + goto fail0; + + /* Did we lose a race with mount? */ + if (a_dvp->v_mountedhere != NULL) { + error = EBUSY; + goto fail1; + } + + /* Make sure the caller may read the directory. */ + error = VOP_ACCESS(a_dvp, VEXEC, cred); + if (error) + goto fail1; + + a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp); + if (a_dirent != NULL) { + KASSERT(a_dirent->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(a_dirent->td_node != a_dnode); + /* We ruled out `..' earlier. */ + KASSERT(a_dirent->td_node != + a_dnode->tn_spec.tn_dir.tn_parent); + if (a_dirent->td_node == b_dnode) { + error = overlap_error; + goto fail1; + } + mutex_enter(&a_dirent->td_node->tn_vlock); + error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp); + if (error) + goto fail1; + KASSERT(a_vp->v_mount == mount); + /* Refuse to rename (over) a mount point. */ + if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail2; + } + } else if (!a_missing_ok) { + error = ENOENT; + goto fail1; + } else { + a_vp = NULL; } -out_ok: - /* Trigger the rename event. */ - VN_KNOTE(fvp, NOTE_RENAME); - error = 0; -out: + KASSERT(a_vp != a_dvp); + KASSERT(a_vp != b_dvp); + + error = tmpfs_rename_lock_directory(b_dvp, b_dnode); + if (error) + goto fail2; + + /* Did we lose a race with mount? */ + if (b_dvp->v_mountedhere != NULL) { + error = EBUSY; + goto fail3; + } + + /* Make sure the caller may read the directory. */ + error = VOP_ACCESS(b_dvp, VEXEC, cred); + if (error) + goto fail3; + + b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp); + if (b_dirent != NULL) { + KASSERT(b_dirent->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(b_dirent->td_node != b_dnode); + /* We ruled out `..' earlier. */ + KASSERT(b_dirent->td_node != + b_dnode->tn_spec.tn_dir.tn_parent); + /* b is not an ancestor of a. */ + KASSERT(b_dirent->td_node != a_dnode); + /* But the source and target nodes might be the same. */ + if ((a_dirent == NULL) || + (a_dirent->td_node != b_dirent->td_node)) { + mutex_enter(&b_dirent->td_node->tn_vlock); + error = tmpfs_vnode_get(mount, b_dirent->td_node, + &b_vp); + if (error) + goto fail3; + KASSERT(b_vp->v_mount == mount); + KASSERT(a_vp != b_vp); + /* Refuse to rename (over) a mount point. */ + if ((b_vp->v_type == VDIR) && + (b_vp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail4; + } + } else { + b_vp = a_vp; + vref(b_vp); + } + } else if (!b_missing_ok) { + error = ENOENT; + goto fail3; + } else { + b_vp = NULL; + } + KASSERT(b_vp != a_dvp); + KASSERT(b_vp != b_dvp); + + KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE); + KASSERT(a_missing_ok || (a_dirent != NULL)); + KASSERT(a_missing_ok || (a_dirent->td_node != NULL)); + KASSERT(b_missing_ok || (b_dirent != NULL)); + KASSERT(b_missing_ok || (b_dirent->td_node != NULL)); + KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL)); + KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp)); + KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL)); + KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp)); + KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE)); + KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE)); + + *a_dirent_ret = a_dirent; + *b_dirent_ret = b_dirent; + *a_vp_ret = a_vp; + *b_vp_ret = b_vp; + return 0; + +fail4: if (b_vp != NULL) { + KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE); + if (b_vp != a_vp) + vput(b_vp); + else + vrele(a_vp); + } + +fail3: KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE); + VOP_UNLOCK(b_dvp); + +fail2: if (a_vp != NULL) { + KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE); + vput(a_vp); + } + +fail1: KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE); + VOP_UNLOCK(a_dvp); + +fail0: /* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */ + /* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */ + /* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */ + return error; +} + +/* + * Shuffle the directory entries to move fvp from the directory fdvp + * into the directory tdvp. fde is fvp's directory entry in fdvp. If + * we are overwriting a target node, it is tvp, and tde is its + * directory entry in tdvp. + * + * fdvp, fvp, tdvp, and tvp must all be locked and referenced. + */ +static void +tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs, + struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp, + struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp) +{ + + KASSERT(tmpfs != NULL); + KASSERT(fdvp != NULL); + KASSERT(fde != NULL); + KASSERT(fvp != NULL); + KASSERT(tdvp != NULL); + KASSERT(fde->td_node != NULL); + KASSERT(fde->td_node->tn_vnode == fvp); + KASSERT((tde == NULL) == (tvp == NULL)); + KASSERT((tde == NULL) || (tde->td_node != NULL)); + KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp)); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + + /* + * If we are moving from one directory to another, detach the + * source entry and reattach it to the target directory. + */ if (fdvp != tdvp) { - VOP_UNLOCK(fdvp); + /* tmpfs_dir_detach clobbers fde->td_node, so save it. */ + struct tmpfs_node *fnode = fde->td_node; + tmpfs_dir_detach(fdvp, fde); + tmpfs_dir_attach(tdvp, fde, fnode); + } else if (tvp == NULL) { + /* + * We are changing the directory. tmpfs_dir_attach and + * tmpfs_dir_detach note the events for us, but for + * this case we don't call them, so we must note the + * event explicitly. + */ + VN_KNOTE(fdvp, NOTE_WRITE); } - if (fvp != tvp) { - VOP_UNLOCK(fvp); + + /* + * If we are replacing an existing target entry, delete it. + */ + if (tde != NULL) { + KASSERT(tvp != NULL); + KASSERT(tde->td_node != NULL); + KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR)); + if (tde->td_node->tn_type == VDIR) { + KASSERT(tde->td_node->tn_size == 0); + KASSERT(tde->td_node->tn_links == 2); + /* Decrement the extra link count for `.' so + * the vnode will be recycled when released. */ + tde->td_node->tn_links--; + } + tmpfs_dir_detach(tdvp, tde); + tmpfs_free_dirent(tmpfs, tde); } -out_unlocked: - /* Release target nodes. */ - if (tdvp == tvp) { - vrele(tdvp); - } else { - vput(tdvp); +} + +/* + * Remove the entry de for the non-directory vp from the directory dvp. + * + * Everything must be locked and referenced. + */ +static int +tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp, + struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp, + kauth_cred_t cred) +{ + int error; + + KASSERT(tmpfs != NULL); + KASSERT(dvp != NULL); + KASSERT(dnode != NULL); + KASSERT(de != NULL); + KASSERT(vp != NULL); + KASSERT(dnode->tn_vnode == dvp); + KASSERT(de->td_node != NULL); + KASSERT(de->td_node->tn_vnode == vp); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); + + error = tmpfs_remove_check_possible(dnode, de->td_node); + if (error) + return error; + + error = tmpfs_remove_check_permitted(cred, dnode, de->td_node); + error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp, + error); + if (error) + return error; + + tmpfs_dir_detach(dvp, de); + tmpfs_free_dirent(tmpfs, de); + + return 0; +} + +/* + * Check whether a rename is possible independent of credentials. + * + * Everything must be locked and referenced. + */ +static int +tmpfs_rename_check_possible( + struct tmpfs_node *fdnode, struct tmpfs_node *fnode, + struct tmpfs_node *tdnode, struct tmpfs_node *tnode) +{ + + KASSERT(fdnode != NULL); + KASSERT(fnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode != fnode); + KASSERT(tdnode != tnode); + KASSERT(fnode != tnode); + KASSERT(fdnode->tn_vnode != NULL); + KASSERT(fnode->tn_vnode != NULL); + KASSERT(tdnode->tn_vnode != NULL); + KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL)); + KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT((tnode == NULL) || + (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE)); + + /* + * If fdnode is immutable, we can't write to it. If fdnode is + * append-only, the only change we can make is to add entries + * to it. If fnode is immutable, we can't change the links to + * it. If fnode is append-only...well, this is what UFS does. + */ + if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND)) + return EPERM; + + /* + * If tdnode is immutable, we can't write to it. If tdnode is + * append-only, we can add entries, but we can't change + * existing entries. + */ + if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0))) + return EPERM; + + /* + * If tnode is immutable, we can't replace links to it. If + * tnode is append-only...well, this is what UFS does. + */ + if (tnode != NULL) { + KASSERT(tnode != NULL); + if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0) + return EPERM; } - if (tvp) { - vput(tvp); + + return 0; +} + +/* + * Check whether a rename is permitted given our credentials. + * + * Everything must be locked and referenced. + */ +static int +tmpfs_rename_check_permitted(kauth_cred_t cred, + struct tmpfs_node *fdnode, struct tmpfs_node *fnode, + struct tmpfs_node *tdnode, struct tmpfs_node *tnode) +{ + int error; + + KASSERT(fdnode != NULL); + KASSERT(fnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode != fnode); + KASSERT(tdnode != tnode); + KASSERT(fnode != tnode); + KASSERT(fdnode->tn_vnode != NULL); + KASSERT(fnode->tn_vnode != NULL); + KASSERT(tdnode->tn_vnode != NULL); + KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL)); + KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT((tnode == NULL) || + (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE)); + + /* + * We need to remove or change an entry in the source directory. + */ + error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred); + if (error) + return error; + + /* + * If we are changing directories, then we need to write to the + * target directory to add or change an entry. Also, if fnode + * is a directory, we need to write to it to change its `..' + * entry. + */ + if (fdnode != tdnode) { + error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred); + if (error) + return error; + if (fnode->tn_type == VDIR) { + error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred); + if (error) + return error; + } } - /* Release source nodes. */ - vrele(fdvp); - vrele(fvp); + error = tmpfs_check_sticky(cred, fdnode, fnode); + if (error) + return error; - if (newname != NULL) { - tmpfs_strname_free(tmp, newname, namelen); + error = tmpfs_check_sticky(cred, tdnode, tnode); + if (error) + return error; + + return 0; +} + +/* + * Check whether removing node's entry in dnode is possible independent + * of credentials. + * + * Everything must be locked and referenced. + */ +static int +tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node) +{ + + KASSERT(dnode != NULL); + KASSERT(dnode->tn_vnode != NULL); + KASSERT(node != NULL); + KASSERT(dnode != node); + KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE); + + /* + * We want to delete the entry. If dnode is immutable, we + * can't write to it to delete the entry. If dnode is + * append-only, the only change we can make is to add entries, + * so we can't delete entries. If node is immutable, we can't + * change the links to it, so we can't delete the entry. If + * node is append-only...well, this is what UFS does. + */ + if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND)) + return EPERM; + + return 0; +} + +/* + * Check whether removing node's entry in dnode is permitted given our + * credentials. + * + * Everything must be locked and referenced. + */ +static int +tmpfs_remove_check_permitted(kauth_cred_t cred, + struct tmpfs_node *dnode, struct tmpfs_node *node) +{ + int error; + + KASSERT(dnode != NULL); + KASSERT(dnode->tn_vnode != NULL); + KASSERT(node != NULL); + KASSERT(dnode != node); + KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE); + + /* + * Check whether we are permitted to write to the source + * directory in order to delete an entry from it. + */ + error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred); + if (error) + return error; + + error = tmpfs_check_sticky(cred, dnode, node); + if (error) + return error; + + return 0; +} + +/* + * Check whether we may change an entry in a sticky directory. If the + * directory is sticky, the user must own either the directory or, if + * it exists, the node, in order to change the entry. + * + * Everything must be locked and referenced. + */ +static int +tmpfs_check_sticky(kauth_cred_t cred, + struct tmpfs_node *dnode, struct tmpfs_node *node) +{ + + KASSERT(dnode != NULL); + KASSERT(dnode->tn_vnode != NULL); + KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT((node == NULL) || (node->tn_vnode != NULL)); + KASSERT((node == NULL) || + (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE)); + + if (dnode->tn_mode & S_ISTXT) { + uid_t euid = kauth_cred_geteuid(cred); + if (euid == dnode->tn_uid) + return 0; + if ((node == NULL) || (euid == node->tn_uid)) + return 0; + return EPERM; } - return error; + + return 0; } int