Diff below changes the representation of a NFS mount point.  It is
adapted from NetBSD with some tweaks of mine and is required for
properly locking NFSnodes.

The idea is to keep a reference to the root vnode in 'struct nfsmount'
instead of doing a lookup every time VFS_ROOT() is called.  Calls to
nfs_root() in the diskless path also become useless and we don't have
to play with locking there.  Finally we look at the number of refcounts
when unmounting a filesystem, this is useful when debugging async races
as reported by bluhm@ on bugs@.

Note that nfs_root() already returns a "locked" node, that's why I'm
using vput(9) and not vrele(9).  However in the current context of NFS
these functions are identical since VOP_LOCK/UNLOCK are noop.

I have tested this on diskless & non-diskless, but I'm looking for more
tests 'cause this area is critical.

Comments?  Oks?

Index: nfs/nfs_node.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_node.c,v
retrieving revision 1.66
diff -u -p -r1.66 nfs_node.c
--- nfs/nfs_node.c      28 Mar 2018 09:40:26 -0000      1.66
+++ nfs/nfs_node.c      3 Apr 2018 12:22:12 -0000
@@ -138,19 +138,7 @@ loop:
        /* we now have an nfsnode on this vnode */
        vp->v_flag &= ~VLARVAL;
        np->n_vnode = vp;
-
        rw_init(&np->n_commitlock, "nfs_commitlk");
-
-       /* 
-        * Are we getting the root? If so, make sure the vnode flags
-        * are correct 
-        */
-       if ((fhsize == nmp->nm_fhsize) && !bcmp(fh, nmp->nm_fh, fhsize)) {
-               if (vp->v_type == VNON)
-                       vp->v_type = VDIR;
-               vp->v_flag |= VROOT;
-       }
-
        np->n_fhp = &np->n_fh;
        bcopy(fh, np->n_fhp, fhsize);
        np->n_fhsize = fhsize;
Index: nfs/nfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.116
diff -u -p -r1.116 nfs_vfsops.c
--- nfs/nfs_vfsops.c    10 Feb 2018 05:24:23 -0000      1.116
+++ nfs/nfs_vfsops.c    3 Apr 2018 13:14:40 -0000
@@ -71,11 +71,13 @@ extern struct nfsstats nfsstats;
 extern int nfs_ticks;
 extern u_int32_t nfs_procids[NFS_NPROCS];
 
-int            nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, 
struct proc *);
-int            nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred 
**);
-struct mount   *nfs_mount_diskless(struct nfs_dlmount *, char *, int);
+int    nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t,
+           struct proc *);
+int    nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **);
+struct mount *nfs_mount_diskless(struct nfs_dlmount *, char *, int,
+           struct vnode **, struct proc *p);
 int    mountnfs(struct nfs_args *, struct mount *, struct mbuf *,
-           const char *, char *);
+           const char *, char *, struct vnode **, struct proc *p);
 int    nfs_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *);
 int    nfs_root(struct mount *, struct vnode **);
 int    nfs_start(struct mount *, int, struct proc *);
@@ -123,15 +125,13 @@ nfs_statfs(struct mount *mp, struct stat
        struct nfsmount *nmp = VFSTONFS(mp);
        int error = 0, retattr;
        struct ucred *cred;
-       struct nfsnode *np;
        u_quad_t tquad;
 
        info.nmi_v3 = (nmp->nm_flag & NFSMNT_NFSV3);
 
-       error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+       error = nfs_root(mp, &vp);
        if (error)
                return (error);
-       vp = NFSTOV(np);
        cred = crget();
        cred->cr_ngroups = 0;
        if (info.nmi_v3 && (nmp->nm_flag & NFSMNT_GOTFSINFO) == 0)
@@ -178,7 +178,7 @@ nfs_statfs(struct mount *mp, struct stat
        copy_statfs_info(sbp, mp);
        m_freem(info.nmi_mrep);
 nfsmout: 
-       vrele(vp);
+       vput(vp);
        crfree(cred);
        return (error);
 }
@@ -281,14 +281,14 @@ nfs_mountroot(void)
         */
        if (nfs_boot_getfh(&nfs_diskless.nd_boot, "root", 
&nfs_diskless.nd_root, -1))
                panic("nfs_mountroot: root");
-       mp = nfs_mount_diskless(&nfs_diskless.nd_root, "/", 0);
-       nfs_root(mp, &rootvp);
+       mp = nfs_mount_diskless(&nfs_diskless.nd_root, "/", 0, &vp, procp);
        printf("root on %s\n", nfs_diskless.nd_root.ndm_host);
 
        /*
         * Link it into the mount list.
         */
        TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+       rootvp = vp;
        vfs_unbusy(mp);
 
        /* Get root attributes (for the time). */
@@ -333,8 +333,8 @@ nfs_mountroot(void)
         */
        error = nfs_boot_getfh(&nfs_diskless.nd_boot, "swap", 
&nfs_diskless.nd_swap, 5);
        if (!error) {
-               mp = nfs_mount_diskless(&nfs_diskless.nd_swap, "/swap", 0);
-               nfs_root(mp, &vp);
+               mp = nfs_mount_diskless(&nfs_diskless.nd_swap, "/swap", 0, &vp,
+                   procp);
                vfs_unbusy(mp);
 
                /*
@@ -376,7 +376,8 @@ nfs_mountroot(void)
  * Internal version of mount system call for diskless setup.
  */
 struct mount *
-nfs_mount_diskless(struct nfs_dlmount *ndmntp, char *mntname, int mntflag)
+nfs_mount_diskless(struct nfs_dlmount *ndmntp, char *mntname, int mntflag,
+    struct vnode **vpp, struct proc *p)
 {
        struct mount *mp;
        struct mbuf *m;
@@ -392,7 +393,7 @@ nfs_mount_diskless(struct nfs_dlmount *n
            (m->m_len = ndmntp->ndm_args.addr->sa_len));
 
        error = mountnfs(&ndmntp->ndm_args, mp, m, mntname,
-           ndmntp->ndm_args.hostname);
+           ndmntp->ndm_args.hostname, vpp, p);
        if (error)
                panic("nfs_mountroot: mount %s failed: %d", mntname, error);
 
@@ -556,6 +557,7 @@ nfs_mount(struct mount *mp, const char *
        int error;
        struct nfs_args *args = data;
        struct mbuf *nam;
+       struct vnode *vp;
        char hst[MNAMELEN];
        size_t len;
        u_char nfh[NFSX_V3FHMAX];
@@ -599,7 +601,7 @@ nfs_mount(struct mount *mp, const char *
        if (error)
                return (error);
        args->fh = nfh;
-       error = mountnfs(args, mp, nam, path, hst);
+       error = mountnfs(args, mp, nam, path, hst, &vp, p);
        return (error);
 }
 
@@ -608,9 +610,12 @@ nfs_mount(struct mount *mp, const char *
  */
 int
 mountnfs(struct nfs_args *argp, struct mount *mp, struct mbuf *nam,
-    const char *pth, char *hst)
+    const char *pth, char *hst, struct vnode **vpp, struct proc *p)
 {
        struct nfsmount *nmp;
+       struct nfsnode *np;
+       struct vnode *vp;
+       struct vattr attr;
        int error;
 
        if (mp->mnt_flag & MNT_UPDATE) {
@@ -633,12 +638,10 @@ mountnfs(struct nfs_args *argp, struct m
        nmp->nm_readdirsize = NFS_READDIRSIZE;
        nmp->nm_numgrps = NFS_MAXGRPS;
        nmp->nm_readahead = NFS_DEFRAHEAD;
-       nmp->nm_fhsize = argp->fhsize;
        nmp->nm_acregmin = NFS_MINATTRTIMO;
        nmp->nm_acregmax = NFS_MAXATTRTIMO;
        nmp->nm_acdirmin = NFS_MINATTRTIMO;
        nmp->nm_acdirmax = NFS_MAXATTRTIMO;
-       bcopy(argp->fh, nmp->nm_fh, argp->fhsize);
        mp->mnt_stat.f_namemax = MAXNAMLEN;
        memset(mp->mnt_stat.f_mntonname, 0, MNAMELEN);
        strlcpy(mp->mnt_stat.f_mntonname, pth, MNAMELEN);
@@ -673,6 +676,30 @@ mountnfs(struct nfs_args *argp, struct m
         * point.
         */
        mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+       error = nfs_nget(mp, (nfsfh_t *)argp->fh, argp->fhsize, &np);
+       if (error)
+               goto bad;
+       vp = NFSTOV(np);
+       error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
+       if (error) {
+               vput(vp);
+               goto bad;
+       }
+
+       /*
+        * A reference count is needed on the nfsnode representing the
+        * remote root.  If this object is not persistent, then backward
+        * traversals of the mount point (i.e. "..") will not work if
+        * the nfsnode gets flushed out of the cache. Ufs does not have
+        * this problem, because one can identify root inodes by their
+        * number == ROOTINO (2).  So, just unlock, but no rele.
+        */
+       nmp->nm_vnode = vp;
+       if (vp->v_type == VNON)
+               vp->v_type = VDIR;
+       vp->v_flag = VROOT;
+       VOP_UNLOCK(vp, curproc);
+       *vpp = vp;
 
        return (0);
 bad:
@@ -687,18 +714,35 @@ int
 nfs_unmount(struct mount *mp, int mntflags, struct proc *p)
 {
        struct nfsmount *nmp;
-       int error, flags;
+       struct vnode *vp;
+       int error, flags = 0;
 
        nmp = VFSTONFS(mp);
-       flags = 0;
+       error = nfs_root(mp, &vp);
+       if (error)
+               return (error);
+
+       if ((mntflags & MNT_FORCE) == 0 && vp->v_usecount > 2) {
+               vput(vp);
+               return (EBUSY);
+       }
 
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;
 
-       error = vflush(mp, NULL, flags);
-       if (error)
+       error = vflush(mp, vp, flags);
+       if (error) {
+               vput(vp);
                return (error);
+       }
 
+       /*
+        * There are two references count to get rid of here: one
+        * from mountnfs() and one from nfs_root() above.
+        */
+       vrele(vp);
+       vput(vp);
+       vgone(vp);
        nfs_disconnect(nmp);
        m_freem(nmp->nm_nam);
        timeout_del(&nmp->nm_rtimeout);
@@ -713,15 +757,19 @@ nfs_unmount(struct mount *mp, int mntfla
 int
 nfs_root(struct mount *mp, struct vnode **vpp)
 {
+       struct vnode *vp;
        struct nfsmount *nmp;
-       struct nfsnode *np;
        int error;
 
        nmp = VFSTONFS(mp);
-       error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
-       if (error)
+       vp = nmp->nm_vnode;
+       vref(vp);
+       error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
+       if (error) {
+               vrele(vp);
                return (error);
-       *vpp = NFSTOV(np);
+       }
+       *vpp = vp;
        return (0);
 }
 
Index: nfs/nfsmount.h
===================================================================
RCS file: /cvs/src/sys/nfs/nfsmount.h,v
retrieving revision 1.27
diff -u -p -r1.27 nfsmount.h
--- nfs/nfsmount.h      22 Feb 2017 11:42:46 -0000      1.27
+++ nfs/nfsmount.h      3 Apr 2018 12:22:12 -0000
@@ -49,12 +49,11 @@ struct      nfsmount {
                nm_ntree;               /* filehandle/node tree */
        TAILQ_HEAD(reqs, nfsreq)
                nm_reqsq;               /* request queue for this mount. */
-       struct timeout nm_rtimeout;     /* timeout (scans/resends nm_reqsq). */
-       int     nm_flag;                /* Flags for soft/hard... */
+       struct  timeout nm_rtimeout;    /* timeout (scans/resends nm_reqsq). */
        struct  mount *nm_mountp;       /* Vfs structure for this filesystem */
+       struct  vnode *nm_vnode;        /* vnode of root dir */
+       int     nm_flag;                /* Flags for soft/hard... */
        int     nm_numgrps;             /* Max. size of groupslist */
-       u_char  nm_fh[NFSX_V3FHMAX];    /* File handle of root dir */
-       int     nm_fhsize;              /* Size of root file handle */
        struct  socket *nm_so;          /* Rpc socket */
        int     nm_sotype;              /* Type of socket */
        int     nm_soproto;             /* and protocol */

Reply via email to