Author: kib
Date: Sat May 11 11:17:44 2013
New Revision: 250505
URL: http://svnweb.freebsd.org/changeset/base/250505

Log:
  - Fix nullfs vnode reference leak in nullfs_reclaim_lowervp().  The
    null_hashget() obtains the reference on the nullfs vnode, which must
    be dropped.
  
  - Fix a wart which existed from the introduction of the nullfs
    caching, do not unlock lower vnode in the nullfs_reclaim_lowervp().
    It should be innocent, but now it is also formally safe.  Inform the
    nullfs_reclaim() about this using the NULLV_NOUNLOCK flag set on
    nullfs inode.
  
  - Add a callback to the upper filesystems for the lower vnode
    unlinking. When inactivating a nullfs vnode, check if the lower
    vnode was unlinked, indicated by nullfs flag NULLV_DROP or VV_NOSYNC
    on the lower vnode, and reclaim upper vnode if so.  This allows
    nullfs to purge cached vnodes for the unlinked lower vnode, avoiding
    excessive caching.
  
  Reported by:  G??ran L??wkrantz <goran.lowkra...@ismobile.com>
  Tested by:    pho
  Sponsored by: The FreeBSD Foundation
  MFC after:    2 weeks

Modified:
  head/sys/fs/nullfs/null.h
  head/sys/fs/nullfs/null_subr.c
  head/sys/fs/nullfs/null_vfsops.c
  head/sys/fs/nullfs/null_vnops.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vfs_syscalls.c
  head/sys/sys/mount.h

Modified: head/sys/fs/nullfs/null.h
==============================================================================
--- head/sys/fs/nullfs/null.h   Sat May 11 10:51:32 2013        (r250504)
+++ head/sys/fs/nullfs/null.h   Sat May 11 11:17:44 2013        (r250505)
@@ -53,8 +53,12 @@ struct null_node {
        LIST_ENTRY(null_node)   null_hash;      /* Hash list */
        struct vnode            *null_lowervp;  /* VREFed once */
        struct vnode            *null_vnode;    /* Back pointer */
+       u_int                   null_flags;
 };
 
+#define        NULLV_NOUNLOCK  0x0001
+#define        NULLV_DROP      0x0002
+
 #define        MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
 #define        VTONULL(vp) ((struct null_node *)(vp)->v_data)
 #define        NULLTOV(xp) ((xp)->null_vnode)

Modified: head/sys/fs/nullfs/null_subr.c
==============================================================================
--- head/sys/fs/nullfs/null_subr.c      Sat May 11 10:51:32 2013        
(r250504)
+++ head/sys/fs/nullfs/null_subr.c      Sat May 11 11:17:44 2013        
(r250505)
@@ -247,6 +247,7 @@ null_nodeget(mp, lowervp, vpp)
 
        xp->null_vnode = vp;
        xp->null_lowervp = lowervp;
+       xp->null_flags = 0;
        vp->v_type = lowervp->v_type;
        vp->v_data = xp;
        vp->v_vnlock = lowervp->v_vnlock;

Modified: head/sys/fs/nullfs/null_vfsops.c
==============================================================================
--- head/sys/fs/nullfs/null_vfsops.c    Sat May 11 10:51:32 2013        
(r250504)
+++ head/sys/fs/nullfs/null_vfsops.c    Sat May 11 11:17:44 2013        
(r250505)
@@ -65,7 +65,6 @@ static vfs_statfs_t   nullfs_statfs;
 static vfs_unmount_t   nullfs_unmount;
 static vfs_vget_t      nullfs_vget;
 static vfs_extattrctl_t        nullfs_extattrctl;
-static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp;
 
 /*
  * Mount null layer
@@ -391,8 +390,37 @@ nullfs_reclaim_lowervp(struct mount *mp,
        vp = null_hashget(mp, lowervp);
        if (vp == NULL)
                return;
+       VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
        vgone(vp);
-       vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
+       vput(vp);
+}
+
+static void
+nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
+{
+       struct vnode *vp;
+       struct null_node *xp;
+
+       vp = null_hashget(mp, lowervp);
+       if (vp == NULL)
+               return;
+       xp = VTONULL(vp);
+       xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
+       vhold(vp);
+       vunref(vp);
+
+       /*
+        * If vunref() dropped the last use reference on the nullfs
+        * vnode, it must be reclaimed, and its lock was split from
+        * the lower vnode lock.  Need to do extra unlock before
+        * allowing the final vdrop() to free the vnode.
+        */
+       if (vp->v_usecount == 0) {
+               KASSERT((vp->v_iflag & VI_DOOMED) != 0,
+                   ("not reclaimed %p", vp));
+               VOP_UNLOCK(vp, 0);
+       }
+       vdrop(vp);
 }
 
 static struct vfsops null_vfsops = {
@@ -408,6 +436,7 @@ static struct vfsops null_vfsops = {
        .vfs_unmount =          nullfs_unmount,
        .vfs_vget =             nullfs_vget,
        .vfs_reclaim_lowervp =  nullfs_reclaim_lowervp,
+       .vfs_unlink_lowervp =   nullfs_unlink_lowervp,
 };
 
 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);

Modified: head/sys/fs/nullfs/null_vnops.c
==============================================================================
--- head/sys/fs/nullfs/null_vnops.c     Sat May 11 10:51:32 2013        
(r250504)
+++ head/sys/fs/nullfs/null_vnops.c     Sat May 11 11:17:44 2013        
(r250505)
@@ -692,18 +692,24 @@ null_unlock(struct vop_unlock_args *ap)
 static int
 null_inactive(struct vop_inactive_args *ap __unused)
 {
-       struct vnode *vp;
+       struct vnode *vp, *lvp;
+       struct null_node *xp;
        struct mount *mp;
        struct null_mount *xmp;
 
        vp = ap->a_vp;
+       xp = VTONULL(vp);
+       lvp = NULLVPTOLOWERVP(vp);
        mp = vp->v_mount;
        xmp = MOUNTTONULLMOUNT(mp);
-       if ((xmp->nullm_flags & NULLM_CACHE) == 0) {
+       if ((xmp->nullm_flags & NULLM_CACHE) == 0 ||
+           (xp->null_flags & NULLV_DROP) != 0 ||
+           (lvp->v_vflag & VV_NOSYNC) != 0) {
                /*
                 * If this is the last reference and caching of the
-                * nullfs vnodes is not enabled, then free up the
-                * vnode so as not to tie up the lower vnodes.
+                * nullfs vnodes is not enabled, or the lower vnode is
+                * deleted, then free up the vnode so as not to tie up
+                * the lower vnodes.
                 */
                vp->v_object = NULL;
                vrecycle(vp);
@@ -748,7 +754,10 @@ null_reclaim(struct vop_reclaim_args *ap
         */
        if (vp->v_writecount > 0)
                VOP_ADD_WRITECOUNT(lowervp, -1);
-       vput(lowervp);
+       if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
+               vunref(lowervp);
+       else
+               vput(lowervp);
        free(xp, M_NULLFSNODE);
 
        return (0);

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Sat May 11 10:51:32 2013        (r250504)
+++ head/sys/kern/vfs_subr.c    Sat May 11 11:17:44 2013        (r250505)
@@ -2700,19 +2700,20 @@ vgone(struct vnode *vp)
 }
 
 static void
-vgonel_reclaim_lowervp_vfs(struct mount *mp __unused,
+notify_lowervp_vfs_dummy(struct mount *mp __unused,
     struct vnode *lowervp __unused)
 {
 }
 
 /*
- * Notify upper mounts about reclaimed vnode.
+ * Notify upper mounts about reclaimed or unlinked vnode.
  */
-static void
-vgonel_reclaim_lowervp(struct vnode *vp)
+void
+vfs_notify_upper(struct vnode *vp, int event)
 {
        static struct vfsops vgonel_vfsops = {
-               .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs
+               .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
+               .vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
        };
        struct mount *mp, *ump, *mmp;
 
@@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp)
                }
                TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
                MNT_IUNLOCK(mp);
-               VFS_RECLAIM_LOWERVP(ump, vp);
+               switch (event) {
+               case VFS_NOTIFY_UPPER_RECLAIM:
+                       VFS_RECLAIM_LOWERVP(ump, vp);
+                       break;
+               case VFS_NOTIFY_UPPER_UNLINK:
+                       VFS_UNLINK_LOWERVP(ump, vp);
+                       break;
+               default:
+                       KASSERT(0, ("invalid event %d", event));
+                       break;
+               }
                MNT_ILOCK(mp);
                ump = TAILQ_NEXT(mmp, mnt_upper_link);
                TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
@@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp)
        active = vp->v_usecount;
        oweinact = (vp->v_iflag & VI_OWEINACT);
        VI_UNLOCK(vp);
-       vgonel_reclaim_lowervp(vp);
+       vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
 
        /*
         * Clean out any buffers associated with the vnode.

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Sat May 11 10:51:32 2013        
(r250504)
+++ head/sys/kern/vfs_syscalls.c        Sat May 11 11:17:44 2013        
(r250505)
@@ -1846,6 +1846,7 @@ restart:
                if (error)
                        goto out;
 #endif
+               vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
                error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
@@ -3825,6 +3826,7 @@ restart:
                        return (error);
                goto restart;
        }
+       vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
        error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
        vn_finished_write(mp);
 out:

Modified: head/sys/sys/mount.h
==============================================================================
--- head/sys/sys/mount.h        Sat May 11 10:51:32 2013        (r250504)
+++ head/sys/sys/mount.h        Sat May 11 11:17:44 2013        (r250505)
@@ -608,7 +608,7 @@ typedef     int vfs_mount_t(struct mount *mp
 typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
                    struct sysctl_req *req);
 typedef void vfs_susp_clean_t(struct mount *mp);
-typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp);
+typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
 
 struct vfsops {
        vfs_mount_t             *vfs_mount;
@@ -626,7 +626,8 @@ struct vfsops {
        vfs_extattrctl_t        *vfs_extattrctl;
        vfs_sysctl_t            *vfs_sysctl;
        vfs_susp_clean_t        *vfs_susp_clean;
-       vfs_reclaim_lowervp_t   *vfs_reclaim_lowervp;
+       vfs_notify_lowervp_t    *vfs_reclaim_lowervp;
+       vfs_notify_lowervp_t    *vfs_unlink_lowervp;
 };
 
 vfs_statfs_t   __vfs_statfs;
@@ -747,6 +748,14 @@ vfs_statfs_t       __vfs_statfs;
        }                                                               \
 } while (0)
 
+#define        VFS_UNLINK_LOWERVP(MP, VP) do {                                 
\
+       if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) {                \
+               VFS_PROLOGUE(MP);                                       \
+               (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP));        \
+               VFS_EPILOGUE(MP);                                       \
+       }                                                               \
+} while (0)
+
 #define VFS_KNOTE_LOCKED(vp, hint) do                                  \
 {                                                                      \
        if (((vp)->v_vflag & VV_NOKNOTE) == 0)                          \
@@ -759,6 +768,9 @@ vfs_statfs_t        __vfs_statfs;
                VN_KNOTE((vp), (hint), 0);                              \
 } while (0)
 
+#define        VFS_NOTIFY_UPPER_RECLAIM        1
+#define        VFS_NOTIFY_UPPER_UNLINK         2
+
 #include <sys/module.h>
 
 /*
@@ -840,6 +852,7 @@ int vfs_modevent(module_t, int, void *);
 void   vfs_mount_error(struct mount *, const char *, ...);
 void   vfs_mountroot(void);                    /* mount our root filesystem */
 void   vfs_mountedfrom(struct mount *, const char *from);
+void   vfs_notify_upper(struct vnode *, int);
 void   vfs_oexport_conv(const struct oexport_args *oexp,
            struct export_args *exp);
 void   vfs_ref(struct mount *);
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to