--On Wednesday, 08 May, 2013 12:13 PM +0300 Konstantin Belousov <kostik...@gmail.com> wrote:

On Tue, May 07, 2013 at 08:30:06AM +0200, G??ran L??wkrantz wrote:
I created a PR, kern/178238, on this but would like to know if anyone
has  any ideas or patches?

Have updated the system where I see this to FreeBSD 9.1-STABLE #0
r250229  and still have the problem.

The patch below should fix the issue for you, at least it did so in my
limited testing.

What is does:
1. When inactivating a nullfs vnode, check if the lower vnode is
   unlinked, and reclaim upper vnode if so. [This fixes your case].

2. Besides a callback to the upper filesystems for the lower vnode
   reclaimation, it also calls the upper fs for lower vnode unlink.
   This allows nullfs to purge cached vnodes for the unlinked lower.
   [This fixes an opposite case, when the vnode is removed from the
   lower mount, but upper aliases prevent the vnode from being
   recycled].

3. Fix a wart which existed from the introduction of the nullfs caching,
   do not unlock lower vnode in the nullfs_reclaim_lowervp().  It should
   be completely innocent, but now it is also formally safe.

4. Fix vnode reference leak in nullfs_reclaim_lowervp().

Please note that the patch is basically not tested, I only verified your
scenario and a mirror of it as described in item 2.

diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index 4f37020..a624be6 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -53,8 +53,11 @@ struct null_node {
        LIST_ENTRY(null_node)   null_hash;      /* Hash list */
        struct vnode            *null_lowervp;  /* VREFed once */
        struct vnode            *null_vnode;    /* Back pointer */
+       u_int                   null_flags;
 };

+#define        NULLV_NOUNLOCK  0x0001
+
 #define        MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
 #define        VTONULL(vp) ((struct null_node *)(vp)->v_data)
 #define        NULLTOV(xp) ((xp)->null_vnode)
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 02932bd..544c358 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -65,7 +65,6 @@ static vfs_statfs_t   nullfs_statfs;
 static vfs_unmount_t   nullfs_unmount;
 static vfs_vget_t      nullfs_vget;
 static vfs_extattrctl_t        nullfs_extattrctl;
-static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp;

 /*
  * Mount null layer
@@ -391,8 +390,22 @@ nullfs_reclaim_lowervp(struct mount *mp, struct
vnode *lowervp)         vp = null_hashget(mp, lowervp);
        if (vp == NULL)
                return;
+       VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
        vgone(vp);
-       vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
+       vput(vp);
+}
+
+static void
+nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
+{
+       struct vnode *vp;
+
+       vp = null_hashget(mp, lowervp);
+       if (vp == NULL || vp->v_usecount > 1)
+               return;
+       VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
+       vgone(vp);
+       vput(vp);
 }

 static struct vfsops null_vfsops = {
@@ -408,6 +421,7 @@ static struct vfsops null_vfsops = {
        .vfs_unmount =          nullfs_unmount,
        .vfs_vget =             nullfs_vget,
        .vfs_reclaim_lowervp =  nullfs_reclaim_lowervp,
+       .vfs_unlink_lowervp =   nullfs_unlink_lowervp,
 };

 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index f59865f..3c41124 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -692,18 +692,21 @@ null_unlock(struct vop_unlock_args *ap)
 static int
 null_inactive(struct vop_inactive_args *ap __unused)
 {
-       struct vnode *vp;
+       struct vnode *vp, *lvp;
        struct mount *mp;
        struct null_mount *xmp;

        vp = ap->a_vp;
+       lvp = NULLVPTOLOWERVP(vp);
        mp = vp->v_mount;
        xmp = MOUNTTONULLMOUNT(mp);
-       if ((xmp->nullm_flags & NULLM_CACHE) == 0) {
+       if ((xmp->nullm_flags & NULLM_CACHE) == 0 ||
+           (lvp->v_vflag & VV_NOSYNC) != 0) {
                /*
                 * If this is the last reference and caching of the
-                * nullfs vnodes is not enabled, then free up the
-                * vnode so as not to tie up the lower vnodes.
+                * nullfs vnodes is not enabled, or the lower vnode is
+                * deleted, then free up the vnode so as not to tie up
+                * the lower vnodes.
                 */
                vp->v_object = NULL;
                vrecycle(vp);
@@ -748,7 +751,10 @@ null_reclaim(struct vop_reclaim_args *ap)
         */
        if (vp->v_writecount > 0)
                VOP_ADD_WRITECOUNT(lowervp, -1);
-       vput(lowervp);
+       if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
+               vunref(lowervp);
+       else
+               vput(lowervp);
        free(xp, M_NULLFSNODE);

        return (0);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index de118f7..988a114 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2700,19 +2700,20 @@ vgone(struct vnode *vp)
 }

 static void
-vgonel_reclaim_lowervp_vfs(struct mount *mp __unused,
+notify_lowervp_vfs_dummy(struct mount *mp __unused,
     struct vnode *lowervp __unused)
 {
 }

 /*
- * Notify upper mounts about reclaimed vnode.
+ * Notify upper mounts about reclaimed or unlinked vnode.
  */
-static void
-vgonel_reclaim_lowervp(struct vnode *vp)
+void
+vfs_notify_upper(struct vnode *vp, int event)
 {
        static struct vfsops vgonel_vfsops = {
-               .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs
+               .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
+               .vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
        };
        struct mount *mp, *ump, *mmp;

@@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp)
                }
                TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
                MNT_IUNLOCK(mp);
-               VFS_RECLAIM_LOWERVP(ump, vp);
+               switch (event) {
+               case VFS_NOTIFY_UPPER_RECLAIM:
+                       VFS_RECLAIM_LOWERVP(ump, vp);
+                       break;
+               case VFS_NOTIFY_UPPER_UNLINK:
+                       VFS_UNLINK_LOWERVP(ump, vp);
+                       break;
+               default:
+                       KASSERT(0, ("invalid event %d", event));
+                       break;
+               }
                MNT_ILOCK(mp);
                ump = TAILQ_NEXT(mmp, mnt_upper_link);
                TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
@@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp)
        active = vp->v_usecount;
        oweinact = (vp->v_iflag & VI_OWEINACT);
        VI_UNLOCK(vp);
-       vgonel_reclaim_lowervp(vp);
+       vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);

        /*
         * Clean out any buffers associated with the vnode.
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 29cb7bd..a004ea0 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1846,6 +1846,7 @@ restart:
                if (error)
                        goto out;
 #endif
+               vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
                error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
@@ -3825,6 +3826,7 @@ restart:
                        return (error);
                goto restart;
        }
+       vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
        error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
        vn_finished_write(mp);
 out:
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index a9c86f6..a953dae 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -608,7 +608,7 @@ typedef     int vfs_mount_t(struct mount *mp);
 typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
                    struct sysctl_req *req);
 typedef void vfs_susp_clean_t(struct mount *mp);
-typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode
*lowervp); +typedef void vfs_notify_lowervp_t(struct mount *mp, struct
vnode *lowervp);
 struct vfsops {
        vfs_mount_t             *vfs_mount;
@@ -626,7 +626,8 @@ struct vfsops {
        vfs_extattrctl_t        *vfs_extattrctl;
        vfs_sysctl_t            *vfs_sysctl;
        vfs_susp_clean_t        *vfs_susp_clean;
-       vfs_reclaim_lowervp_t   *vfs_reclaim_lowervp;
+       vfs_notify_lowervp_t    *vfs_reclaim_lowervp;
+       vfs_notify_lowervp_t    *vfs_unlink_lowervp;
 };

 vfs_statfs_t   __vfs_statfs;
@@ -747,6 +748,14 @@ vfs_statfs_t       __vfs_statfs;
        }                                                               \
 } while (0)

+#define        VFS_UNLINK_LOWERVP(MP, VP) do {                                 
\
+       if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) {          \
+               VFS_PROLOGUE(MP);                                       \
+               (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP));  \
+               VFS_EPILOGUE(MP);                                       \
+       }                                                               \
+} while (0)
+
 #define VFS_KNOTE_LOCKED(vp, hint) do                                  \
 {                                                                      \
        if (((vp)->v_vflag & VV_NOKNOTE) == 0)                           \
@@ -759,6 +768,9 @@ vfs_statfs_t        __vfs_statfs;
                VN_KNOTE((vp), (hint), 0);                              \
 } while (0)

+#define        VFS_NOTIFY_UPPER_RECLAIM        1
+#define        VFS_NOTIFY_UPPER_UNLINK         2
+
 #include <sys/module.h>

 /*
@@ -840,6 +852,7 @@ int vfs_modevent(module_t, int, void *);
 void   vfs_mount_error(struct mount *, const char *, ...);
 void   vfs_mountroot(void);                    /* mount our root filesystem */
 void   vfs_mountedfrom(struct mount *, const char *from);
+void   vfs_notify_upper(struct vnode *, int);
 void   vfs_oexport_conv(const struct oexport_args *oexp,
            struct export_args *exp);
 void   vfs_ref(struct mount *);
I assume this is CURRENT? Tried on STABLE but got this:
cc1: warnings being treated as errors
/usr/src/sys/kern/vfs_subr.c: In function 'vfs_notify_upper':
/usr/src/sys/kern/vfs_subr.c:2801: warning: implicit declaration of function 'VFS_PROLOGUE' /usr/src/sys/kern/vfs_subr.c:2801: warning: nested extern declaration of 'VFS_PROLOGUE' [-Wnested-externs] /usr/src/sys/kern/vfs_subr.c:2801: warning: implicit declaration of function 'VFS_EPILOGUE' /usr/src/sys/kern/vfs_subr.c:2801: warning: nested extern declaration of 'VFS_EPILOGUE' [-Wnested-externs]
*** [vfs_subr.o] Error code 1

/glz



_______________________________________________
freebsd-stable@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Reply via email to