Module Name: src
Committed By: riastradh
Date: Mon Oct 15 23:08:20 UTC 2012
Modified Files:
src/external/cddl/osnet/dist/uts/common/fs/zfs: zfs_vfsops.c
zfs_vnops.c zfs_znode.c
Log Message:
Fix various issues in zfs life cycle, locking, and vop protocol.
- Restore some zfs locking and unlocking that got lost randomly.
- Enable use of the BSD vnode lock. Lock order: all BSD vnode locks
are taken before all zfs internal locks. There remains an issue with
O_EXCL, to be solved later (famous last words). KASSERT the locking
scheme up the wazoo.
- Take our cruft out of zfs_lookup and move it to zfs_netbsd_lookup.
Restore much of the way zfs_lookup looked to make merging future
versions easier. Disable use of the namecache for now because its
locking dance is too scary to contemplate.
- Implement BSD semantics for rename, to appease our tests. This is
a provisional kludge; eventually we need VOP_RENAME to take a flag
specifying whether to use BSD semantics or POSIX semantics.
- Simplify zfs_netbsd_reclaim and make it work. Now that getnewvnode
never tries to vclean anything itself, we need not worry about
recursion of ZFS_OBJ_MUTEX locks.
- Clarify and fix genfs node initialization and destruction.
zfs passes most of our atf vfs tests now, including the rename races.
Still to do:
- fix the impedance mismatch between our permissions model and zfs's;
- fix O_EXCL (nontrivial);
- throw dirconc at it and see how badly it explodes;
- find why zpool sometimes wedges itself during mkfs; and
- find why pool caches sometimes seem to get corrupted.
To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 \
src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c
cvs rdiff -u -r1.11 -r1.12 \
src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c
cvs rdiff -u -r1.12 -r1.13 \
src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c
diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c:1.7 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c:1.8
--- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c:1.7 Sun Nov 20 02:54:25 2011
+++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c Mon Oct 15 23:08:19 2012
@@ -1811,6 +1811,10 @@ zfs_root(vfs_t *vfsp, vnode_t **vpp)
*vpp = ZTOV(rootzp);
dprintf("vpp -> %d, error %d -- %p\n", (*vpp)->v_type, error, *vpp);
ZFS_EXIT(zfsvfs);
+ if (error == 0)
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ KASSERT((error != 0) || (*vpp != NULL));
+ KASSERT((error != 0) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
return (error);
}
Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c
diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.11 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.12
--- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.11 Mon Oct 1 18:19:18 2012
+++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c Mon Oct 15 23:08:19 2012
@@ -1197,13 +1197,12 @@ specvp_check(vnode_t **vpp, cred_t *cr)
*/
/* ARGSUSED */
static int
-zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
- int nameiop, cred_t *cr, int flags)
+zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
{
znode_t *zdp = VTOZ(dvp);
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
- int *direntflags = NULL;
- void *realpnp = NULL;
int error = 0;
/* fast path */
@@ -1249,7 +1248,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode
ZFS_VERIFY_ZP(zdp);
*vpp = NULL;
- dprintf("zfs_lookup called %s\n", nm);
+
if (flags & LOOKUP_XATTR) {
#ifdef TODO
/*
@@ -1301,17 +1300,6 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode
return (error);
}
- /*
- * Before tediously performing a linear scan of the directory,
- * check the name cache to see if the directory/name pair
- * we are looking for is known already.
- */
-
- if ((error = cache_lookup(dvp, vpp, cnp)) >= 0) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
ZFS_EXIT(zfsvfs);
@@ -1323,56 +1311,6 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode
error = specvp_check(vpp, cr);
ZFS_EXIT(zfsvfs);
-
- /* Translate errors and add SAVENAME when needed. */
- if (cnp->cn_flags & ISLASTCN) {
- switch (nameiop) {
- case CREATE:
- case RENAME:
- if (error == ENOENT) {
- error = EJUSTRETURN;
- break;
- }
- /* FALLTHROUGH */
- case DELETE:
- break;
- }
- }
-
- if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) {
- int ltype = 0;
-
- if (cnp->cn_flags & ISDOTDOT) {
- ltype = VOP_ISLOCKED(dvp);
- VOP_UNLOCK(dvp);
- }
- error = vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- if (cnp->cn_flags & ISDOTDOT)
- vn_lock(dvp, ltype | LK_RETRY);
- if (error != 0) {
- VN_RELE(*vpp);
- *vpp = NULL;
- return (error);
- }
- }
-
- /*
- * Insert name into cache if appropriate.
- */
- if ((cnp->cn_flags & MAKEENTRY) == 0){
- return (error);
- }
- switch (error) {
- case 0:
- cache_enter(dvp, *vpp, cnp);
- break;
- case ENOENT:
- if (nameiop != CREATE)
- cache_enter(dvp, *vpp, cnp);
- break;
- default:
- break;
- }
return (error);
}
@@ -2033,6 +1971,12 @@ top:
vnevent_rmdir(vp, dvp, name, ct);
/*
+ * Grab a lock on the directory to make sure that noone is
+ * trying to add (or lookup) entries while we are removing it.
+ */
+ rw_enter(&zp->z_name_lock, RW_WRITER);
+
+ /*
* Grab a lock on the parent pointer to make sure we play well
* with the treewalk and directory rename code.
*/
@@ -3571,10 +3515,20 @@ top:
* entries refer to the same file object, rename
* must do nothing and exit without error.
*/
+#ifndef __NetBSD__
+ /*
+ * But on NetBSD we have a different system call to do
+ * this, posix_rename, which sorta kinda handles this
+ * case (modulo races), and our tests expect BSD
+ * semantics for rename, so we'll do that until we can
+ * push the choice between BSD and POSIX semantics into
+ * the VOP_RENAME protocol as a flag.
+ */
if (szp->z_id == tzp->z_id) {
error = 0;
goto out;
}
+#endif
}
vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
@@ -3623,15 +3577,20 @@ top:
return (error);
}
- if (tzp) /* Attempt to remove the existing target */
+ if (tzp && (tzp->z_id != szp->z_id))
+ /* Attempt to remove the existing target */
error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
if (error == 0) {
- error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+ if (!tzp || (tzp->z_id != szp->z_id))
+ error = zfs_link_create(tdl, szp, tx, ZRENAMING);
if (error == 0) {
szp->z_phys->zp_flags |= ZFS_AV_MODIFIED;
- error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+ error = zfs_link_destroy(sdl, szp, tx,
+ /* Kludge for BSD rename semantics. */
+ ((tzp && (tzp->z_id == szp->z_id)) ?
+ zflg : ZRENAMING), NULL);
ASSERT(error == 0);
zfs_log_rename(zilog, tx,
@@ -4779,13 +4738,8 @@ static int
zfs_netbsd_open(void *v)
{
struct vop_open_args *ap = v;
- vnode_t *vp = ap->a_vp;
- znode_t *zp = VTOZ(vp);
- int error;
- error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL);
-
- return (error);
+ return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL));
}
static int
@@ -4846,63 +4800,346 @@ zfs_netbsd_access(void *v)
static int
zfs_netbsd_lookup(void *v)
{
- struct vop_lookup_args *ap = v;
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
struct componentname *cnp = ap->a_cnp;
char nm[NAME_MAX + 1];
- int err;
-
- ASSERT(cnp->cn_namelen < sizeof(nm));
- strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
+ int error;
- err = zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
- cnp->cn_cred, 0);
-
- return err;
+ KASSERT(dvp != NULL);
+ KASSERT(vpp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT(cnp->cn_namelen < sizeof nm);
+
+#if 0 /* Namecache too scary to contemplate. */
+ *vpp = NULL;
+
+ /*
+ * Do an access check before the cache lookup. zfs_lookup does
+ * an access check too, but it's too scary to contemplate
+ * injecting our namecache stuff into zfs internals.
+ *
+ * XXX Is this the correct access check?
+ */
+ if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0)
+ goto out;
+
+ /*
+ * Check the namecache before entering zfs_lookup.
+ * cache_lookup does the locking dance for us.
+ */
+ if ((error = cache_lookup(dvp, vpp, cnp)) >= 0)
+ goto out;
+#endif
+
+ /*
+ * zfs_lookup wants a null-terminated component name, but namei
+ * gives us a pointer into the full pathname.
+ */
+ (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
+
+ error = zfs_lookup(dvp, nm, vpp, NULL, 0, NULL, cnp->cn_cred, NULL,
+ NULL, NULL);
+
+ /*
+ * Translate errors to match our namei insanity.
+ */
+ if (cnp->cn_flags & ISLASTCN) {
+ switch (cnp->cn_nameiop) {
+ case CREATE:
+ case RENAME:
+ if (error == ENOENT) {
+ error = EJUSTRETURN;
+ break;
+ }
+ /* FALLTHROUGH */
+ case DELETE:
+ break;
+ }
+ }
+
+ if (error) {
+ KASSERT(*vpp == NULL);
+ goto out;
+ }
+ KASSERT(*vpp != NULL); /* XXX Correct? */
+
+ /*
+ * Do a locking dance in conformance to the VOP_LOOKUP protocol.
+ */
+ if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
+ KASSERT(!(cnp->cn_flags & ISDOTDOT));
+ KASSERT(dvp == *vpp);
+ } else if ((cnp->cn_namelen == 2) &&
+ (cnp->cn_nameptr[0] == '.') &&
+ (cnp->cn_nameptr[1] == '.')) {
+ KASSERT(cnp->cn_flags & ISDOTDOT);
+ VOP_UNLOCK(dvp);
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ } else {
+ KASSERT(!(cnp->cn_flags & ISDOTDOT));
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ }
+
+out:
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
+
+#if 0 /* Namecache too scary to contemplate. */
+ /*
+ * Insert name into cache if appropriate.
+ *
+ * XXX This seems like a lot of code. Is it all necessary?
+ * Can we just do a single cache_enter call?
+ */
+#if 0
+ cache_enter(dvp, *vpp, cnp);
+#endif
+
+ if ((cnp->cn_flags & MAKEENTRY) == 0)
+ return (error);
+
+ switch (error) {
+ case 0:
+ cache_enter(dvp, *vpp, cnp);
+ break;
+ case ENOENT:
+ KASSERT(*vpp == NULL);
+ if (cnp->cn_nameiop != CREATE)
+ cache_enter(dvp, NULL, cnp);
+ break;
+ default:
+ break;
+ }
+#endif
+
+ return (error);
}
static int
zfs_netbsd_create(void *v)
{
- struct vop_create_args *ap = v;
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
struct componentname *cnp = ap->a_cnp;
- vattr_t *vap = ap->a_vap;
+ struct vattr *vap = ap->a_vap;
int mode;
+ int error;
+
+ KASSERT(dvp != NULL);
+ KASSERT(vpp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(vap != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+
+#if 0
+ /*
+ * ZFS doesn't require dvp to be BSD-locked, and the caller
+ * expects us to drop this lock anyway, so we might as well
+ * drop it early to encourage concurrency.
+ */
+ VOP_UNLOCK(dvp);
+#endif
vattr_init_mask(vap);
mode = vap->va_mode & ALLPERMS;
- return (zfs_create(ap->a_dvp, (char *)cnp->cn_nameptr, vap, !EXCL, mode,
- ap->a_vpp, cnp->cn_cred));
+ /* XXX !EXCL is wrong here... */
+ error = zfs_create(dvp, __UNCONST(cnp->cn_nameptr), vap, !EXCL, mode,
+ vpp, cnp->cn_cred);
+ if (error) {
+ KASSERT(*vpp == NULL);
+ goto out;
+ }
+ KASSERT(*vpp != NULL);
+
+ /*
+ * Lock *vpp in conformance to the VOP_CREATE protocol.
+ */
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+
+out:
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
+
+ /*
+ * Unlock and release dvp because the VOP_CREATE protocol is insane.
+ */
+ VOP_UNLOCK(dvp);
+ VN_RELE(dvp);
+
+ return (error);
}
static int
zfs_netbsd_remove(void *v)
{
- struct vop_remove_args *ap = v;
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode *vp = ap->a_vp;
+ struct componentname *cnp = ap->a_cnp;
+ int error;
+
+ KASSERT(dvp != NULL);
+ KASSERT(vp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
+
+#if 0
+ /*
+ * ZFS doesn't require dvp to be BSD-locked, and the caller
+ * expects us to drop this lock anyway, so we might as well
+ * drop it early to encourage concurrency.
+ */
+ VOP_UNLOCK(dvp);
+#endif
+
+ /*
+ * zfs_remove will look up the entry again itself, so discard vp.
+ */
+ VOP_UNLOCK(vp);
+ VN_RELE(vp);
+
+ error = zfs_remove(dvp, __UNCONST(cnp->cn_nameptr), cnp->cn_cred, NULL,
+ 0);
+
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
- return (zfs_remove(ap->a_dvp, (char *)ap->a_cnp->cn_nameptr,
- ap->a_cnp->cn_cred, NULL, 0));
+ /*
+ * Unlock and release dvp because the VOP_REMOVE protocol is insane.
+ */
+ VOP_UNLOCK(dvp);
+ VN_RELE(dvp);
+
+ return (error);
}
static int
zfs_netbsd_mkdir(void *v)
{
- struct vop_mkdir_args *ap = v;
- vattr_t *vap = ap->a_vap;
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct vattr *vap = ap->a_vap;
+ int error;
+
+ KASSERT(dvp != NULL);
+ KASSERT(vpp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(vap != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+
+#if 0
+ /*
+ * ZFS doesn't require dvp to be BSD-locked, and the caller
+ * expects us to drop this lock anyway, so we might as well
+ * drop it early to encourage concurrency.
+ */
+ VOP_UNLOCK(dvp);
+#endif
vattr_init_mask(vap);
- return (zfs_mkdir(ap->a_dvp, (char *)ap->a_cnp->cn_nameptr, vap, ap->a_vpp,
- ap->a_cnp->cn_cred, NULL, 0, NULL));
+ error = zfs_mkdir(dvp, __UNCONST(cnp->cn_nameptr), vap, vpp,
+ cnp->cn_cred, NULL, 0, NULL);
+ if (error) {
+ KASSERT(*vpp == NULL);
+ goto out;
+ }
+ KASSERT(*vpp != NULL);
+
+ /*
+ * Lock *vpp in conformance to the VOP_MKDIR protocol.
+ */
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+
+out:
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
+
+ /*
+ * Unlock and release dvp because the VOP_MKDIR protocol is insane.
+ */
+ VOP_UNLOCK(dvp);
+ VN_RELE(dvp);
+
+ return (error);
}
static int
zfs_netbsd_rmdir(void *v)
{
- struct vop_rmdir_args *ap = v;
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode *vp = ap->a_vp;
struct componentname *cnp = ap->a_cnp;
+ int error;
+
+ KASSERT(dvp != NULL);
+ KASSERT(vp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
+
+#if 0
+ /*
+ * ZFS doesn't require dvp to be BSD-locked, and the caller
+ * expects us to drop this lock anyway, so we might as well
+ * drop it early to encourage concurrency.
+ */
+ VOP_UNLOCK(dvp);
+#endif
+
+ /*
+ * zfs_rmdir will look up the entry again itself, so discard vp.
+ */
+ VOP_UNLOCK(vp);
+ VN_RELE(vp);
- return (zfs_rmdir(ap->a_dvp, (char *)cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0));
+ error = zfs_rmdir(dvp, __UNCONST(cnp->cn_nameptr), NULL, cnp->cn_cred,
+ NULL, 0);
+
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+
+ /*
+ * Unlock and release dvp because the VOP_RMDIR protocol is insane.
+ */
+ VOP_UNLOCK(dvp);
+ VN_RELE(dvp);
+ return error;
}
static int
@@ -5053,21 +5290,63 @@ zfs_netbsd_rename(void *v)
} */ *ap = v;
vnode_t *fdvp = ap->a_fdvp;
vnode_t *fvp = ap->a_fvp;
+ struct componentname *fcnp = ap->a_fcnp;
vnode_t *tdvp = ap->a_tdvp;
vnode_t *tvp = ap->a_tvp;
+ struct componentname *tcnp = ap->a_tcnp;
+ kauth_cred_t cred;
int error;
- error = zfs_rename(fdvp, (char *)ap->a_fcnp->cn_nameptr, tdvp,
- (char *)ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0);
+ KASSERT(fdvp != NULL);
+ KASSERT(fvp != NULL);
+ KASSERT(fcnp != NULL);
+ KASSERT(fcnp->cn_nameptr != NULL);
+ KASSERT(tdvp != NULL);
+ KASSERT(tcnp != NULL);
+ KASSERT(fcnp->cn_nameptr != NULL);
+ /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
+ /* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
+ KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
+ KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
+ KASSERT(fdvp->v_type == VDIR);
+ KASSERT(tdvp->v_type == VDIR);
- if (tdvp == tvp)
- VN_RELE(tdvp);
- else
- VN_URELE(tdvp);
- if (tvp)
- VN_URELE(tvp);
- VN_RELE(fdvp);
+ cred = fcnp->cn_cred;
+
+ /*
+ * XXX Want a better equality test. `tcnp->cn_cred == cred'
+ * hoses p2k because puffs transmits the creds separately and
+ * allocates distinct but equivalent structures for them.
+ */
+ KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred));
+
+ /*
+ * Drop the insane locks.
+ */
+ VOP_UNLOCK(tdvp);
+ if ((tvp != NULL) && (tvp != tdvp))
+ VOP_UNLOCK(tvp);
+
+ /*
+ * Release the source and target nodes; zfs_rename will look
+ * them up again once the locking situation is sane.
+ */
VN_RELE(fvp);
+ if (tvp != NULL)
+ VN_RELE(tvp);
+
+ /*
+ * Do the rename ZFSly.
+ */
+ error = zfs_rename(fdvp, __UNCONST(fcnp->cn_nameptr), tdvp,
+ __UNCONST(tcnp->cn_nameptr), cred, NULL, 0);
+
+ /*
+ * Release the directories now too, because the VOP_RENAME
+ * protocol is insane.
+ */
+ VN_RELE(fdvp);
+ VN_RELE(tdvp);
return (error);
}
@@ -5075,15 +5354,65 @@ zfs_netbsd_rename(void *v)
static int
zfs_netbsd_symlink(void *v)
{
- struct vop_symlink_args *ap = v;
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
struct componentname *cnp = ap->a_cnp;
- vattr_t *vap = ap->a_vap;
+ struct vattr *vap = ap->a_vap;
+ char *target = ap->a_target;
+ int error;
+
+ KASSERT(dvp != NULL);
+ KASSERT(vpp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(vap != NULL);
+ KASSERT(target != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+
+#if 0
+ /*
+ * ZFS doesn't require dvp to be BSD-locked, and the caller
+ * expects us to drop this lock anyway, so we might as well
+ * drop it early to encourage concurrency.
+ */
+ VOP_UNLOCK(dvp);
+#endif
vap->va_type = VLNK; /* Netbsd: Syscall only sets va_mode. */
vattr_init_mask(vap);
- return (zfs_symlink(ap->a_dvp, ap->a_vpp, (char *)cnp->cn_nameptr, vap,
- ap->a_target, cnp->cn_cred, 0));
+ error = zfs_symlink(dvp, vpp, __UNCONST(cnp->cn_nameptr), vap, target,
+ cnp->cn_cred, 0);
+ if (error) {
+ KASSERT(*vpp == NULL);
+ goto out;
+ }
+ KASSERT(*vpp != NULL);
+
+
+ /*
+ * Lock *vpp in conformance to the VOP_SYMLINK protocol.
+ */
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+
+out:
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+ KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
+
+ /*
+ * Unlock and release dvp because the VOP_SYMLINK protocol is insane.
+ */
+ VOP_UNLOCK(dvp);
+ VN_RELE(dvp);
+
+ return (error);
}
#ifdef PORT_SOLARIS
@@ -5263,10 +5592,41 @@ zfs_netbsd_readlink(void *v)
static int
zfs_netbsd_link(void *v)
{
- struct vop_link_args *ap = v;
+ struct vop_link_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode *vp = ap->a_vp;
struct componentname *cnp = ap->a_cnp;
+ int error;
+
+ KASSERT(dvp != NULL);
+ KASSERT(vp != NULL);
+ KASSERT(cnp != NULL);
+ KASSERT(cnp->cn_nameptr != NULL);
+ KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
- return (zfs_link(ap->a_dvp, ap->a_vp, (char *)cnp->cn_nameptr, cnp->cn_cred, NULL, 0));
+#if 0
+ /*
+ * ZFS doesn't require dvp to be BSD-locked, and the caller
+ * expects us to drop this lock anyway, so we might as well
+ * drop it early to encourage concurrency.
+ */
+ VOP_UNLOCK(dvp);
+#endif
+
+ error = zfs_link(dvp, vp, __UNCONST(cnp->cn_nameptr), cnp->cn_cred,
+ NULL, 0);
+
+ /*
+ * Unlock and release dvp because the VOP_LINK protocol is insane.
+ */
+ VOP_UNLOCK(dvp);
+ VN_RELE(dvp);
+
+ return (error);
}
static int
@@ -5286,91 +5646,67 @@ zfs_netbsd_inactive(void *v)
return (0);
}
-/*
- * Destroy znode from taskq thread without ZFS_OBJ_MUTEX held.
- */
-static void
-zfs_reclaim_deferred(void *arg)
-{
- znode_t *zp = arg;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- uint64_t z_id = zp->z_id;
-
- /*
- * Don't allow a zfs_zget() while were trying to release this znode
- */
- ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
-
- /* Don't need to call ZFS_OBJ_HOLD_EXIT zfs_inactive did thatfor us. */
- zfs_zinactive(zp);
-
-}
-
static int
zfs_netbsd_reclaim(void *v)
{
- struct vop_reclaim_args *ap = v;
- vnode_t *vp = ap->a_vp;
- znode_t *zp = VTOZ(vp);
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap = v;
+ struct vnode *vp = ap->a_vp;
+ znode_t *zp;
zfsvfs_t *zfsvfs;
- int locked;
-
- locked = 0;
-
- ASSERT(zp != NULL);
- KASSERT(!vn_has_cached_data(vp));
+ int error;
+ KASSERT(vp != NULL);
+ zp = VTOZ(vp);
+ KASSERT(zp != NULL);
zfsvfs = zp->z_zfsvfs;
-
- mutex_enter(&zp->z_lock);
- ASSERT(zp->z_phys);
+ KASSERT(zfsvfs != NULL);
-// dprintf("destroying znode %p -- vnode %p -- zp->z_buf = %p\n", zp, ZTOV(zp), zp->z_dbuf);
-// rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
- genfs_node_destroy(vp);
- cache_purge(vp);
+ /* Not until we get uvm and zfs talking to one another. */
+ KASSERT(!vn_has_cached_data(vp));
+ rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
if (zp->z_dbuf == NULL) {
/*
* The fs has been unmounted, or we did a
* suspend/resume and this file no longer exists.
*/
+ if (vn_has_cached_data(vp))
+ /* zfs and uvm are hosed. Should not happen. */
+ panic("zfs vnode has cached data (0): %p", vp);
rw_exit(&zfsvfs->z_teardown_inactive_lock);
- mutex_exit(&zp->z_lock);
zfs_znode_free(zp);
return (0);
}
- mutex_exit(&zp->z_lock);
- mutex_enter(&zp->z_lock);
- if (!zp->z_unlinked) {
- /*
- * XXX Hack because ZFS_OBJ_MUTEX is held we can't call zfs_zinactive
- * now. I need to defer zfs_zinactive to another thread which doesn't hold this mutex.
- */
- locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 :
- ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id);
- if (locked == 0) {
- /*
- * Lock can't be obtained due to deadlock possibility,
- * so defer znode destruction.
- */
- taskq_dispatch(system_taskq, zfs_reclaim_deferred, zp, 0);
+ /*
+ * Attempt to push any data in the page cache. If this fails
+ * we will get kicked out later in zfs_zinactive().
+ */
+ if (vn_has_cached_data(vp))
+ /* zfs and uvm are hosed. Should not happen. */
+ panic("zfs vnode has cached data (1): %p", vp);
+
+ if (zp->z_atime_dirty && zp->z_unlinked == 0) {
+ dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
+
+ dmu_tx_hold_bonus(tx, zp->z_id);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
} else {
- zfs_znode_dmu_fini(zp);
- /* Our LWP is holding ZFS_OBJ_HELD mutex but it was locked before
- zfs_zinactive was called therefore we can't release it. */
- if (locked == 1)
- ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
- zfs_znode_free(zp);
+ dmu_buf_will_dirty(zp->z_dbuf, tx);
+ mutex_enter(&zp->z_lock);
+ zp->z_atime_dirty = 0;
+ mutex_exit(&zp->z_lock);
+ dmu_tx_commit(tx);
}
- } else
- mutex_exit(&zp->z_lock);
+ }
- ZTOV(zp) = NULL;
- vp->v_data = NULL; /* v_data must be NULL for a cleaned vnode. */
-
- return (0);
+ zfs_zinactive(zp);
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ return 0;
}
static int
@@ -5414,9 +5750,6 @@ zfs_netbsd_pathconf(void *v)
case _PC_CHOWN_RESTRICTED:
*ap->a_retval = 1;
return (0);
- case _PC_NO_TRUNC:
- *ap->a_retval = 1;
- return (0);
case _PC_VDISABLE:
*ap->a_retval = _POSIX_VDISABLE;
return (0);
@@ -5424,10 +5757,15 @@ zfs_netbsd_pathconf(void *v)
return (EINVAL);
}
/* NOTREACHED */
- }
+ }
return (error);
}
+#if 1
+# define zfs_netbsd_lock genfs_lock
+# define zfs_netbsd_unlock genfs_unlock
+# define zfs_netbsd_islocked genfs_islocked
+#else
int
zfs_netbsd_lock(void *v)
{
@@ -5449,6 +5787,7 @@ zfs_netbsd_islocked(void *v)
return LK_EXCLUSIVE;
}
+#endif
/*
int
Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c
diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c:1.12 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c:1.13
--- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c:1.12 Sun Nov 20 02:54:25 2011
+++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c Mon Oct 15 23:08:19 2012
@@ -618,8 +618,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu
int error;
zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
- for (;;) {
+ for (;;) {
error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs,
zfs_vnodeop_p, NULL, &zp->z_vnode);
if (__predict_true(error == 0))
@@ -628,7 +628,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu
"error=%d\n", error);
(void)kpause("zfsnewvn", false, hz, NULL);
}
-
+
ASSERT(zp->z_dirlocks == NULL);
ASSERT(zp->z_dbuf == NULL);
ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
@@ -656,6 +656,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu
vp->v_vfsp = zfsvfs->z_parent->z_vfs;
vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
vp->v_data = zp;
+ genfs_node_init(vp, &zfs_genfsops);
switch (vp->v_type) {
case VDIR:
zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
@@ -686,6 +687,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu
zp->z_zfsvfs = zfsvfs;
mutex_exit(&zfsvfs->z_znodes_lock);
+ VFS_HOLD(zfsvfs->z_vfs);
return (zp);
}
@@ -998,11 +1000,6 @@ again:
if (((znode_phys_t *)db->db_data)->zp_gen != 0) {
zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size);
*zpp = zp;
-
- vp = ZTOV(zp);
- genfs_node_init(vp, &zfs_genfsops);
- VOP_UNLOCK(vp);
-
err = 0;
} else {
dmu_buf_rele(db, NULL);
@@ -1076,17 +1073,13 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *
zfs_znode_free(zp);
}
-/*
- * zfs_zinactive must be called with ZFS_OBJ_HOLD_ENTER held. And this lock
- * will be released in zfs_zinactive.
- */
void
zfs_zinactive(znode_t *zp)
{
vnode_t *vp = ZTOV(zp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
uint64_t z_id = zp->z_id;
-
+
ASSERT(zp->z_dbuf && zp->z_phys);
/*
@@ -1107,7 +1100,7 @@ zfs_zinactive(znode_t *zp)
}
mutex_exit(&zp->z_lock);
- /* XXX why disabled zfs_znode_dmu_fini(zp); */
+ zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
zfs_znode_free(zp);
}
@@ -1116,7 +1109,14 @@ void
zfs_znode_free(znode_t *zp)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- ASSERT(ZTOV(zp) == NULL);
+ struct vnode *vp = ZTOV(zp);
+
+ /* XXX Not all callers are from VOP_RECLAIM. What to do? */
+ KASSERT(vp != NULL);
+ mutex_enter(vp->v_interlock); /* XXX Necessary? */
+ genfs_node_destroy(vp);
+ vp->v_data = NULL;
+ mutex_exit(vp->v_interlock);
dprintf("destroying znode %p\n", zp);
//cpu_Debugger();
@@ -1403,6 +1403,8 @@ top:
dmu_tx_commit(tx);
+ zfs_range_unlock(rl);
+
/*
* Clear any mapped pages in the truncated region. This has to
* happen outside of the transaction to avoid the possibility of