[PATCH 09/16] Unionfs: mmap implementation

Josef 'Jeff' Sipek Sun, 17 Jun 2007 12:22:15 -0700

From: Yiannis Pericleous <[EMAIL PROTECTED]>

Signed-off-by: Shaya Potter <[EMAIL PROTECTED]>
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Yiannis Pericleous <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/Makefile     |    2 +-
 fs/unionfs/commonfops.c |   19 ++-
 fs/unionfs/copyup.c     |    5 +
 fs/unionfs/file.c       |  214 +++++++----------------------
 fs/unionfs/inode.c      |    9 ++
 fs/unionfs/main.c       |    6 -
 fs/unionfs/mmap.c       |  348 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/unionfs/super.c      |    8 +-
 fs/unionfs/union.h      |    1 +
 9 files changed, 438 insertions(+), 174 deletions(-)
 create mode 100644 fs/unionfs/mmap.c


diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
index 6986d79..78be3e7 100644
--- a/fs/unionfs/Makefile
+++ b/fs/unionfs/Makefile
@@ -2,6 +2,6 @@ obj-$(CONFIG_UNION_FS) += unionfs.o
 
 unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
        rdstate.o copyup.o dirhelper.o rename.o unlink.o \
-       lookup.o commonfops.o dirfops.o sioq.o
+       lookup.o commonfops.o dirfops.o sioq.o mmap.o
 
 unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index db8c334..0222393 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -571,12 +571,25 @@ out_nofree:
 int unionfs_file_release(struct inode *inode, struct file *file)
 {
        struct file *hidden_file = NULL;
-       struct unionfs_file_info *fileinfo = UNIONFS_F(file);
-       struct unionfs_inode_info *inodeinfo = UNIONFS_I(inode);
+       struct unionfs_file_info *fileinfo;
+       struct unionfs_inode_info *inodeinfo;
+       struct super_block *sb = inode->i_sb;
        int bindex, bstart, bend;
        int fgen;
+       int err;
+
+       unionfs_read_lock(sb);
+       /*
+        * Yes, we have to revalidate this file even if it's being released.
+        * This is important for open-but-unlinked files, as well as mmap
+        * support.
+        */
+       if ((err = unionfs_file_revalidate(file, 1)))
+               return err;
+       fileinfo = UNIONFS_F(file);
+       BUG_ON(file->f_dentry->d_inode != inode);
+       inodeinfo = UNIONFS_I(inode);
 
-       unionfs_read_lock(inode->i_sb);
        /* fput all the hidden files */
        fgen = atomic_read(&fileinfo->generation);
        bstart = fbstart(file);
diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
index a80ece6..dff4f1c 100644
--- a/fs/unionfs/copyup.c
+++ b/fs/unionfs/copyup.c
@@ -291,8 +291,13 @@ static int __copyup_reg_data(struct dentry *dentry,
 
        kfree(buf);
 
+       if (!err)
+               err = output_file->f_op->fsync(output_file,
+                                              new_hidden_dentry, 0);
+
        if (err)
                goto out_close_out;
+
        if (copyup_file) {
                *copyup_file = output_file;
                goto out_close_in;
diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
index 2e5ec42..afffe59 100644
--- a/fs/unionfs/file.c
+++ b/fs/unionfs/file.c
@@ -22,219 +22,110 @@
  * File Operations *
  *******************/
 
-static loff_t unionfs_llseek(struct file *file, loff_t offset, int origin)
-{
-       loff_t err;
-       struct file *hidden_file = NULL;
-
-       unionfs_read_lock(file->f_dentry->d_sb);
-       if ((err = unionfs_file_revalidate(file, 0)))
-               goto out;
-
-       hidden_file = unionfs_lower_file(file);
-       /* always set hidden position to this one */
-       hidden_file->f_pos = file->f_pos;
-
-       memcpy(&hidden_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
-
-       if (hidden_file->f_op && hidden_file->f_op->llseek)
-               err = hidden_file->f_op->llseek(hidden_file, offset, origin);
-       else
-               err = generic_file_llseek(hidden_file, offset, origin);
-
-       if (err < 0)
-               goto out;
-       if (err != file->f_pos) {
-               file->f_pos = err;
-               file->f_version++;
-       }
-out:
-       unionfs_read_unlock(file->f_dentry->d_sb);
-       return err;
-}
-
 static ssize_t unionfs_read(struct file *file, char __user *buf,
                            size_t count, loff_t *ppos)
 {
-       struct file *hidden_file;
-       loff_t pos = *ppos;
        int err;
 
        unionfs_read_lock(file->f_dentry->d_sb);
+
        if ((err = unionfs_file_revalidate(file, 0)))
                goto out;
 
-       err = -EINVAL;
-       hidden_file = unionfs_lower_file(file);
-       if (!hidden_file->f_op || !hidden_file->f_op->read)
-               goto out;
+       err = do_sync_read(file, buf, count, ppos);
 
-       err = hidden_file->f_op->read(hidden_file, buf, count, &pos);
-       *ppos = pos;
+       if (err >= 0)
+               touch_atime(unionfs_lower_mnt(file->f_path.dentry),
+                           unionfs_lower_dentry(file->f_path.dentry));
 
 out:
        unionfs_read_unlock(file->f_dentry->d_sb);
        return err;
 }
 
-static ssize_t unionfs_write(struct file *file, const char __user *buf,
-                            size_t count, loff_t *ppos)
+static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
+                               unsigned long nr_segs, loff_t pos)
 {
-       int err;
-       struct file *hidden_file = NULL;
-       struct inode *inode;
-       struct inode *hidden_inode;
-       loff_t pos = *ppos;
-       int bstart, bend;
+       int err = 0;
+       struct file *file = iocb->ki_filp;
 
        unionfs_read_lock(file->f_dentry->d_sb);
-       if ((err = unionfs_file_revalidate(file, 1)))
-               goto out;
-
-       inode = file->f_dentry->d_inode;
-
-       bstart = fbstart(file);
-       bend = fbend(file);
 
-       BUG_ON(bstart == -1);
-
-       hidden_file = unionfs_lower_file(file);
-       hidden_inode = hidden_file->f_dentry->d_inode;
-
-       if (!hidden_file->f_op || !hidden_file->f_op->write) {
-               err = -EINVAL;
+       if ((err = unionfs_file_revalidate(file, 0)))
                goto out;
-       }
 
-       /* adjust for append -- seek to the end of the file */
-       if (file->f_flags & O_APPEND)
-               pos = inode->i_size;
+       err = generic_file_aio_read(iocb, iov, nr_segs, pos);
 
-       err = hidden_file->f_op->write(hidden_file, buf, count, &pos);
+       if (err == -EIOCBQUEUED)
+               err = wait_on_sync_kiocb(iocb);
 
-       /*
-        * copy ctime and mtime from lower layer attributes
-        * atime is unchanged for both layers
-        */
        if (err >= 0)
-               fsstack_copy_attr_times(inode, hidden_inode);
-
-       *ppos = pos;
+               touch_atime(unionfs_lower_mnt(file->f_path.dentry),
+                           unionfs_lower_dentry(file->f_path.dentry));
 
-       /* update this inode's size */
-       if (pos > inode->i_size)
-               inode->i_size = pos;
 out:
        unionfs_read_unlock(file->f_dentry->d_sb);
        return err;
 }
-
-static int unionfs_file_readdir(struct file *file, void *dirent,
-                               filldir_t filldir)
-{
-       return -ENOTDIR;
-}
-
-static unsigned int unionfs_poll(struct file *file, poll_table *wait)
+static ssize_t unionfs_write(struct file * file, const char __user * buf,
+                            size_t count, loff_t *ppos)
 {
-       unsigned int mask = DEFAULT_POLLMASK;
-       struct file *hidden_file = NULL;
+       int err = 0;
 
        unionfs_read_lock(file->f_dentry->d_sb);
-       if (unionfs_file_revalidate(file, 0)) {
-               /* We should pretend an error happened. */
-               mask = POLLERR | POLLIN | POLLOUT;
-               goto out;
-       }
-
-       hidden_file = unionfs_lower_file(file);
 
-       if (!hidden_file->f_op || !hidden_file->f_op->poll)
+       if ((err = unionfs_file_revalidate(file, 1)))
                goto out;
 
-       mask = hidden_file->f_op->poll(hidden_file, wait);
+       err = do_sync_write(file, buf, count, ppos);
 
 out:
        unionfs_read_unlock(file->f_dentry->d_sb);
-       return mask;
+       return err;
 }
 
-static int __do_mmap(struct file *file, struct vm_area_struct *vma)
+static int unionfs_file_readdir(struct file *file, void *dirent,
+                               filldir_t filldir)
 {
-       int err;
-       struct file *hidden_file;
-
-       hidden_file = unionfs_lower_file(file);
-
-       err = -ENODEV;
-       if (!hidden_file->f_op || !hidden_file->f_op->mmap)
-               goto out;
-
-       vma->vm_file = hidden_file;
-       err = hidden_file->f_op->mmap(hidden_file, vma);
-       get_file(hidden_file);  /* make sure it doesn't get freed on us */
-       fput(file);             /* no need to keep extra ref on ours */
-out:
-       return err;
+       return -ENOTDIR;
 }
 
 static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
 {
        int err = 0;
        int willwrite;
+       struct file *lower_file;
 
        unionfs_read_lock(file->f_dentry->d_sb);
-       /* This might could be deferred to mmap's writepage. */
-       willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
-       if ((err = unionfs_file_revalidate(file, willwrite)))
-               goto out;
-
-       err = __do_mmap(file, vma);
-
-out:
-       unionfs_read_unlock(file->f_dentry->d_sb);
-       return err;
-}
-
-static int unionfs_fsync(struct file *file, struct dentry *dentry,
-                        int datasync)
-{
-       int err;
-       struct file *hidden_file = NULL;
 
-       unionfs_read_lock(file->f_dentry->d_sb);
        if ((err = unionfs_file_revalidate(file, 1)))
                goto out;
 
-       hidden_file = unionfs_lower_file(file);
-
-       err = -EINVAL;
-       if (!hidden_file->f_op || !hidden_file->f_op->fsync)
-               goto out;
-
-       mutex_lock(&hidden_file->f_dentry->d_inode->i_mutex);
-       err = hidden_file->f_op->fsync(hidden_file, hidden_file->f_dentry,
-                                      datasync);
-       mutex_unlock(&hidden_file->f_dentry->d_inode->i_mutex);
-
-out:
-       unionfs_read_unlock(file->f_dentry->d_sb);
-       return err;
-}
-
-static int unionfs_fasync(int fd, struct file *file, int flag)
-{
-       int err = 0;
-       struct file *hidden_file = NULL;
-
-       unionfs_read_lock(file->f_dentry->d_sb);
-       if ((err = unionfs_file_revalidate(file, 1)))
+       /* This might be deferred to mmap's writepage */
+       willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
+       if ((err = unionfs_file_revalidate(file, willwrite)))
                goto out;
 
-       hidden_file = unionfs_lower_file(file);
-
-       if (hidden_file->f_op && hidden_file->f_op->fasync)
-               err = hidden_file->f_op->fasync(fd, hidden_file, flag);
+       /*
+        * File systems which do not implement ->writepage may use
+        * generic_file_readonly_mmap as their ->mmap op.  If you call
+        * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
+        * But we cannot call the lower ->mmap op, so we can't tell that
+        * writeable mappings won't work.  Therefore, our only choice is to
+        * check if the lower file system supports the ->writepage, and if
+        * not, return EINVAL (the same error that
+        * generic_file_readonly_mmap returns in that case).
+        */
+       lower_file = unionfs_lower_file(file);
+       if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
+               err = -EINVAL;
+               printk("unionfs: branch %d file system does not support "
+                      "writeable mmap\n", fbstart(file));
+       } else {
+               err = generic_file_mmap(file, vma);
+               if (err)
+                       printk("unionfs: generic_file_mmap failed %d\n", err);
+       }
 
 out:
        unionfs_read_unlock(file->f_dentry->d_sb);
@@ -242,16 +133,17 @@ out:
 }
 
 struct file_operations unionfs_main_fops = {
-       .llseek         = unionfs_llseek,
+       .llseek         = generic_file_llseek,
        .read           = unionfs_read,
+       .aio_read       = unionfs_aio_read,
        .write          = unionfs_write,
+       .aio_write      = generic_file_aio_write,
        .readdir        = unionfs_file_readdir,
-       .poll           = unionfs_poll,
        .unlocked_ioctl = unionfs_ioctl,
        .mmap           = unionfs_mmap,
        .open           = unionfs_open,
        .flush          = unionfs_flush,
        .release        = unionfs_file_release,
-       .fsync          = unionfs_fsync,
-       .fasync         = unionfs_fasync,
+       .fsync          = file_fsync,
+       .sendfile       = generic_file_sendfile,
 };
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index 627c2a7..9f1acc4 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -1018,6 +1018,15 @@ static int unionfs_setattr(struct dentry *dentry, struct 
iattr *ia)
                break;
        }
 
+       /* for mmap */
+       if (ia->ia_valid & ATTR_SIZE) {
+               if (ia->ia_size != i_size_read(inode)) {
+                       err = vmtruncate(inode, ia->ia_size);
+                       if (err)
+                               printk("unionfs_setattr: vmtruncate failed\n");
+               }
+       }
+
        /* get the size from the first hidden inode */
        hidden_inode = unionfs_lower_inode(dentry->d_inode);
        fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks);
diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
index a9ad445..2bcc84c 100644
--- a/fs/unionfs/main.c
+++ b/fs/unionfs/main.c
@@ -121,12 +121,6 @@ int unionfs_interpose(struct dentry *dentry, struct 
super_block *sb, int flag)
            S_ISFIFO(hidden_inode->i_mode) || S_ISSOCK(hidden_inode->i_mode))
                init_special_inode(inode, hidden_inode->i_mode,
                                   hidden_inode->i_rdev);
-       /*
-        * Fix our inode's address operations to that of the lower inode
-        * (Unionfs is FiST-Lite)
-        */
-       if (inode->i_mapping->a_ops != hidden_inode->i_mapping->a_ops)
-               inode->i_mapping->a_ops = hidden_inode->i_mapping->a_ops;
 
        /* all well, copy inode attributes */
        fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks);
diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
new file mode 100644
index 0000000..997b619
--- /dev/null
+++ b/fs/unionfs/mmap.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2003-2007 Erez Zadok
+ * Copyright (c) 2003-2006 Charles P. Wright
+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2005-2006 Junjiro Okajima
+ * Copyright (c) 2006      Shaya Potter
+ * Copyright (c) 2005      Arun M. Krishnakumar
+ * Copyright (c) 2004-2006 David P. Quigley
+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
+ * Copyright (c) 2003      Puja Gupta
+ * Copyright (c) 2003      Harikesavan Krishnan
+ * Copyright (c) 2003-2007 Stony Brook University
+ * Copyright (c) 2003-2007 The Research Foundation of State University of New 
York
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "union.h"
+
+/*
+ * Unionfs doesn't implement ->writepages, which is OK with the VFS and
+ * nkeeps our code simpler and smaller.  Nevertheless, somehow, our own
+ * ->writepage must be called so we can sync the upper pages with the lower
+ * pages: otherwise data changed at the upper layer won't get written to the
+ * lower layer.
+ *
+ * Some lower file systems (e.g., NFS) expect the VFS to call its writepages
+ * only, which in turn will call generic_writepages and invoke each of the
+ * lower file system's ->writepage.  NFS in particular uses the
+ * wbc->fs_private field in its nfs_writepage, which is set in its
+ * nfs_writepages.  So if we don't call the lower nfs_writepages first, then
+ * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an
+ * OOPS.  If, however, we implement a unionfs_writepages and then we do call
+ * the lower nfs_writepages, then we "lose control" over the pages we're
+ * trying to write to the lower file system: we won't be writing our own
+ * new/modified data from the upper pages to the lower pages, and any
+ * mmap-based changes are lost.
+ *
+ * This is a fundamental cache-coherency problem in Linux.  The kernel isn't
+ * able to support such stacking abstractions cleanly.  One possible clean
+ * way would be that a lower file system's ->writepage method have some sort
+ * of a callback to validate if any upper pages for the same file+offset
+ * exist and have newer content in them.
+ *
+ * This whole NULL ptr dereference is triggered at the lower file system
+ * (NFS) because the wbc->for_writepages is set to 1.  Therefore, to avoid
+ * this NULL pointer dereference, we set this flag to 0 and restore it upon
+ * exit.  This probably means that we're slightly less efficient in writing
+ * pages out, doing them one at a time, but at least we avoid the oops until
+ * such day as Linux can better support address_space_ops in a stackable
+ * fashion.
+ */
+int unionfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+       int err = -EIO;
+       struct inode *inode;
+       struct inode *lower_inode;
+       struct page *lower_page;
+       char *kaddr, *lower_kaddr;
+       int saved_for_writepages = wbc->for_writepages;
+
+       inode = page->mapping->host;
+       lower_inode = unionfs_lower_inode(inode);
+
+       /* find lower page (returns a locked page) */
+       lower_page = grab_cache_page(lower_inode->i_mapping, page->index);
+       if (!lower_page)
+               goto out;
+
+       /* get page address, and encode it */
+       kaddr = kmap(page);
+       lower_kaddr = kmap(lower_page);
+
+       memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE);
+
+       kunmap(page);
+       kunmap(lower_page);
+
+       BUG_ON(!lower_inode->i_mapping->a_ops->writepage);
+
+       /* workaround for some lower file systems: see big comment on top */
+       if (wbc->for_writepages && !wbc->fs_private)
+               wbc->for_writepages = 0;
+
+       /* call lower writepage (expects locked page) */
+       err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
+       wbc->for_writepages = saved_for_writepages; /* restore value */
+
+       /*
+        * update mtime and ctime of lower level file system
+        * unionfs' mtime and ctime are updated by generic_file_write
+        */
+       lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
+
+       page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt 
*/
+
+       if (err)
+               ClearPageUptodate(page);
+       else
+               SetPageUptodate(page);
+
+out:
+       unlock_page(page);
+       return err;
+}
+
+/*
+ * readpage is called from generic_page_read and the fault handler.
+ * If your file system uses generic_page_read for the read op, it
+ * must implement readpage.
+ *
+ * Readpage expects a locked page, and must unlock it.
+ */
+static int unionfs_do_readpage(struct file *file, struct page *page)
+{
+       int err = -EIO;
+       struct dentry *dentry;
+       struct file *lower_file = NULL;
+       struct inode *inode, *lower_inode;
+       char *page_data;
+       struct page *lower_page;
+       char *lower_page_data;
+
+       dentry = file->f_dentry;
+       if (UNIONFS_F(file) == NULL) {
+               err = -ENOENT;
+               goto out_err;
+       }
+
+       lower_file = unionfs_lower_file(file);
+       inode = dentry->d_inode;
+       lower_inode = unionfs_lower_inode(inode);
+
+       lower_page = NULL;
+
+       /* find lower page (returns a locked page) */
+       lower_page = read_cache_page(lower_inode->i_mapping,
+                                    page->index,
+                                    (filler_t *) lower_inode->i_mapping->
+                                    a_ops->readpage, (void *)lower_file);
+
+       if (IS_ERR(lower_page)) {
+               err = PTR_ERR(lower_page);
+               lower_page = NULL;
+               goto out_release;
+       }
+
+       /*
+        * wait for the page data to show up
+        * (signaled by readpage as unlocking the page)
+        */
+       wait_on_page_locked(lower_page);
+       if (!PageUptodate(lower_page)) {
+               /*
+                * call readpage() again if we returned from wait_on_page
+                * with a page that's not up-to-date; that can happen when a
+                * partial page has a few buffers which are ok, but not the
+                * whole page.
+                */
+               lock_page(lower_page);
+               err = lower_inode->i_mapping->a_ops->readpage(lower_file,
+                                                             lower_page);
+               if (err) {
+                       lower_page = NULL;
+                       goto out_release;
+               }
+
+               wait_on_page_locked(lower_page);
+               if (!PageUptodate(lower_page)) {
+                       err = -EIO;
+                       goto out_release;
+               }
+       }
+
+       /* map pages, get their addresses */
+       page_data = (char *)kmap(page);
+       lower_page_data = (char *)kmap(lower_page);
+
+       memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
+
+       err = 0;
+
+       kunmap(lower_page);
+       kunmap(page);
+
+out_release:
+       if (lower_page)
+               page_cache_release(lower_page); /* undo read_cache_page */
+
+       if (err == 0)
+               SetPageUptodate(page);
+       else
+               ClearPageUptodate(page);
+
+out_err:
+       return err;
+}
+
+int unionfs_readpage(struct file *file, struct page *page)
+{
+       int err;
+
+       unionfs_read_lock(file->f_dentry->d_sb);
+
+       if ((err = unionfs_file_revalidate(file, 0)))
+               goto out;
+
+       err = unionfs_do_readpage(file, page);
+
+       if (!err)
+               touch_atime(unionfs_lower_mnt(file->f_path.dentry),
+                           unionfs_lower_dentry(file->f_path.dentry));
+
+       /*
+        * we have to unlock our page, b/c we _might_ have gotten a locked
+        * page.  but we no longer have to wakeup on our page here, b/c
+        * UnlockPage does it
+        */
+out:
+       unlock_page(page);
+       unionfs_read_unlock(file->f_dentry->d_sb);
+
+       return err;
+}
+
+int unionfs_prepare_write(struct file *file, struct page *page, unsigned from,
+                         unsigned to)
+{
+       int err;
+
+       unionfs_read_lock(file->f_dentry->d_sb);
+
+       err = unionfs_file_revalidate(file, 1);
+
+       unionfs_read_unlock(file->f_dentry->d_sb);
+
+       return err;
+}
+
+int unionfs_commit_write(struct file *file, struct page *page, unsigned from,
+                        unsigned to)
+{
+       int err = -ENOMEM;
+       struct inode *inode, *lower_inode;
+       struct file *lower_file = NULL;
+       loff_t pos;
+       unsigned bytes = to - from;
+       char *page_data = NULL;
+       mm_segment_t old_fs;
+
+       BUG_ON(file == NULL);
+
+       unionfs_read_lock(file->f_dentry->d_sb);
+
+       if ((err = unionfs_file_revalidate(file, 1)))
+               goto out;
+
+       inode = page->mapping->host;
+       lower_inode = unionfs_lower_inode(inode);
+
+       if (UNIONFS_F(file) != NULL)
+               lower_file = unionfs_lower_file(file);
+
+       /* FIXME: is this assertion right here? */
+       BUG_ON(lower_file == NULL);
+
+       page_data = (char *)kmap(page);
+       lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from;
+
+       /* SP: I use vfs_write instead of copying page data and the
+        * prepare_write/commit_write combo because file system's like
+        * GFS/OCFS2 don't like things touching those directly,
+        * calling the underlying write op, while a little bit slower, will
+        * call all the FS specific code as well
+        */
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       err = vfs_write(lower_file, page_data + from, bytes,
+                       &lower_file->f_pos);
+       set_fs(old_fs);
+
+       kunmap(page);
+
+       if (err < 0)
+               goto out;
+
+       inode->i_blocks = lower_inode->i_blocks;
+       /* we may have to update i_size */
+       pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+       if (pos > i_size_read(inode))
+               i_size_write(inode, pos);
+
+       /*
+        * update mtime and ctime of lower level file system
+        * unionfs' mtime and ctime are updated by generic_file_write
+        */
+       lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
+
+       mark_inode_dirty_sync(inode);
+
+out:
+       if (err < 0)
+               ClearPageUptodate(page);
+
+       unionfs_read_unlock(file->f_dentry->d_sb);
+       return err;             /* assume all is ok */
+}
+
+void unionfs_sync_page(struct page *page)
+{
+       struct inode *inode;
+       struct inode *lower_inode;
+       struct page *lower_page;
+       struct address_space *mapping;
+
+       inode = page->mapping->host;
+       lower_inode = unionfs_lower_inode(inode);
+
+       /* find lower page (returns a locked page) */
+       lower_page = grab_cache_page(lower_inode->i_mapping, page->index);
+       if (!lower_page)
+               goto out;
+
+       /* do the actual sync */
+       mapping = lower_page->mapping;
+       /*
+        * XXX: can we optimize ala RAIF and set the lower page to be
+        * discarded after a successful sync_page?
+        */
+       if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+               mapping->a_ops->sync_page(lower_page);
+
+       unlock_page(lower_page);        /* b/c grab_cache_page locked it */
+       page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt 
*/
+
+out:
+       return;
+}
+
+struct address_space_operations unionfs_aops = {
+       .writepage      = unionfs_writepage,
+       .readpage       = unionfs_readpage,
+       .prepare_write  = unionfs_prepare_write,
+       .commit_write   = unionfs_commit_write,
+       .sync_page      = unionfs_sync_page,
+};
diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index a7ff06c..196ff12 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -26,7 +26,7 @@ static struct kmem_cache *unionfs_inode_cachep;
 
 static void unionfs_read_inode(struct inode *inode)
 {
-       static struct address_space_operations unionfs_empty_aops;
+       extern struct address_space_operations unionfs_aops;
        int size;
        struct unionfs_inode_info *info = UNIONFS_I(inode);
 
@@ -58,8 +58,7 @@ static void unionfs_read_inode(struct inode *inode)
        inode->i_op = &unionfs_main_iops;
        inode->i_fop = &unionfs_main_fops;
 
-       /* I don't think ->a_ops is ever allowed to be NULL */
-       inode->i_mapping->a_ops = &unionfs_empty_aops;
+       inode->i_mapping->a_ops = &unionfs_aops;
 }
 
 /*
@@ -73,6 +72,9 @@ static void unionfs_delete_inode(struct inode *inode)
 {
        inode->i_size = 0;      /* every f/s seems to do that */
 
+       if (inode->i_data.nrpages)
+               truncate_inode_pages(&inode->i_data, 0);
+
        clear_inode(inode);
 }
 
diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index 01e29f3..480b8ee 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -38,6 +38,7 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/writeback.h>
+#include <linux/buffer_head.h>
 #include <linux/xattr.h>
 #include <linux/fs_stack.h>
 #include <linux/magic.h>
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 09/16] Unionfs: mmap implementation

Reply via email to