commit 2f8cbae99444b83f13535487c0a22ee68df405ae
Merge: abac97e... bd6a653...
Author: Yiannis Pericleous <[EMAIL PROTECTED]>
Date:   Fri May 25 16:26:19 2007 -0400

    Merge branch 'master' of story:/home/git/unionfs-odf into odf

diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
index 
b64584eff2a8b0ee7716437244758d7dc4850087..cf92d6450da3eab1e1aa3125789953e62968884d
 100644
--- a/fs/unionfs/lookup.c
+++ b/fs/unionfs/lookup.c
@@ -406,31 +406,28 @@ void free_dentry_private_data(struct uni
        kmem_cache_free(unionfs_dentry_cachep, udi);
 }
 
-/* allocate new dentry private data, free old one if necessary */
+/*
+ * Allocate new dentry private data, free old one if necessary.
+ * On success, returns a dentry whose ->info node is locked already.
+ */
 int new_dentry_private_data(struct dentry *dentry)
 {
        int size;
        struct unionfs_dentry_info *info = UNIONFS_D(dentry);
        void *p;
-       int unlock_on_err = 0;
 
-       spin_lock(&dentry->d_lock);
        if (!info) {
-               dentry->d_fsdata = kmem_cache_alloc(unionfs_dentry_cachep,
-                                                   GFP_ATOMIC);
-               info = UNIONFS_D(dentry);
+               info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
                if (!info)
                        goto out;
 
                mutex_init(&info->lock);
-               unionfs_lock_dentry(dentry);
-               unlock_on_err = 1;
-
                info->lower_paths = NULL;
        }
 
-       info->bstart = -1;
-       info->bend = -1;
+       mutex_lock(&info->lock);
+
+       info->bstart = info->bend = -1;
        info->bcount = sbmax(dentry->d_sb);
        info->odf_info = NULL;
        atomic_set(&info->generation,
@@ -445,18 +442,17 @@ int new_dentry_private_data(struct dentr
        info->lower_paths = p;
        memset(info->lower_paths, 0, size);
 
-       spin_unlock(&dentry->d_lock);
+       /* ok, set the dentry private data pointer */
+       dentry->d_fsdata = info;
        return 0;
 
 out_free:
        kfree(info->lower_paths);
-       if (unlock_on_err)
-               unionfs_unlock_dentry(dentry);
+       mutex_unlock(&info->lock);
 
 out:
        free_dentry_private_data(info);
        dentry->d_fsdata = NULL;
-       spin_unlock(&dentry->d_lock);
        return -ENOMEM;
 }
 
diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
index 
105cc2049efb7cd751cc99e5a638038676376357..c79591577a47192b57463ee9a474de46bd737dd2
 100644
--- a/fs/unionfs/mmap.c
+++ b/fs/unionfs/mmap.c
@@ -19,6 +19,39 @@
 
 #include "union.h"
 
+/*
+ * Unionfs doesn't implement ->writepages, which is OK with the VFS and
+ * nkeeps our code simpler and smaller.  Nevertheless, somehow, our own
+ * ->writepage must be called so we can sync the upper pages with the lower
+ * pages: otherwise data changed at the upper layer won't get written to the
+ * lower layer.
+ *
+ * Some lower file systems (e.g., NFS) expect the VFS to call its writepages
+ * only, which in turn will call generic_writepages and invoke each of the
+ * lower file system's ->writepage.  NFS in particular uses the
+ * wbc->fs_private field in its nfs_writepage, which is set in its
+ * nfs_writepages.  So if we don't call the lower nfs_writepages first, then
+ * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an
+ * OOPS.  If, however, we implement a unionfs_writepages and then we do call
+ * the lower nfs_writepages, then we "lose control" over the pages we're
+ * trying to write to the lower file system: we won't be writing our own
+ * new/modified data from the upper pages to the lower pages, and any
+ * mmap-based changes are lost.
+ *
+ * This is a fundamental cache-coherency problem in Linux.  The kernel isn't
+ * able to support such stacking abstractions cleanly.  One possible clean
+ * way would be that a lower file system's ->writepage method have some sort
+ * of a callback to validate if any upper pages for the same file+offset
+ * exist and have newer content in them.
+ *
+ * This whole NULL ptr dereference is triggered at the lower file system
+ * (NFS) because the wbc->for_writepages is set to 1.  Therefore, to avoid
+ * this NULL pointer dereference, we set this flag to 0 and restore it upon
+ * exit.  This probably means that we're slightly less efficient in writing
+ * pages out, doing them one at a time, but at least we avoid the oops until
+ * such day as Linux can better support address_space_ops in a stackable
+ * fashion.
+ */
 int unionfs_writepage(struct page *page, struct writeback_control *wbc)
 {
        int err = -EIO;
@@ -26,7 +59,7 @@ int unionfs_writepage(struct page *page,
        struct inode *lower_inode;
        struct page *lower_page;
        char *kaddr, *lower_kaddr;
-       struct writeback_control lower_wbc;
+       int saved_for_writepages = wbc->for_writepages;
 
        inode = page->mapping->host;
        lower_inode = unionfs_lower_inode(inode);
@@ -46,20 +79,14 @@ int unionfs_writepage(struct page *page,
        kunmap(lower_page);
 
        BUG_ON(!lower_inode->i_mapping->a_ops->writepage);
-       memcpy(&lower_wbc, wbc, sizeof(struct writeback_control));
-       /*
-        * This condition should never occur, but if it does, it causes NFS
-        * (if used a s lower branch) to deference wbc->fs_private,
-        * resulting in a NULL deref oops.
-        * XXX: Maybe it's an NFS/VFS bug?
-        */
-       if (lower_wbc.for_writepages && !lower_wbc.fs_private) {
-               printk("unionfs: setting wbc.for_writepages to 0\n");
-               lower_wbc.for_writepages = 0;
-       }
+
+       /* workaround for some lower file systems: see big comment on top */
+       if (wbc->for_writepages && !wbc->fs_private)
+               wbc->for_writepages = 0;
 
        /* call lower writepage (expects locked page) */
-       err = lower_inode->i_mapping->a_ops->writepage(lower_page, &lower_wbc);
+       err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
+       wbc->for_writepages = saved_for_writepages; /* restore value */
 
        /*
         * update mtime and ctime of lower level file system
@@ -76,7 +103,6 @@ int unionfs_writepage(struct page *page,
 
 out:
        unlock_page(page);
-
        return err;
 }
 
_______________________________________________
unionfs-cvs mailing list: http://unionfs.filesystems.org/
[email protected]
http://www.fsl.cs.sunysb.edu/mailman/listinfo/unionfs-cvs

Reply via email to