Includes read_inode, delete_inode, put_super, statfs, remount_fs (which
supports branch-management ops), clear_inode, alloc_inode, destroy_inode,
write_inode, umount_begin, and show_options.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/super.c | 1020 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 1020 insertions(+), 0 deletions(-)
 create mode 100644 fs/unionfs/super.c

diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
new file mode 100644
index 0000000..d9cf2a7
--- /dev/null
+++ b/fs/unionfs/super.c
@@ -0,0 +1,1020 @@
+/*
+ * Copyright (c) 2003-2007 Erez Zadok
+ * Copyright (c) 2003-2006 Charles P. Wright
+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2005-2006 Junjiro Okajima
+ * Copyright (c) 2005      Arun M. Krishnakumar
+ * Copyright (c) 2004-2006 David P. Quigley
+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
+ * Copyright (c) 2003      Puja Gupta
+ * Copyright (c) 2003      Harikesavan Krishnan
+ * Copyright (c) 2003-2007 Stony Brook University
+ * Copyright (c) 2003-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "union.h"
+
+/*
+ * The inode cache is used with alloc_inode for both our inode info and the
+ * vfs inode.
+ */
+static struct kmem_cache *unionfs_inode_cachep;
+
+static void unionfs_read_inode(struct inode *inode)
+{
+       int size;
+       struct unionfs_inode_info *info = UNIONFS_I(inode);
+
+       unionfs_read_lock(inode->i_sb);
+
+       memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
+       info->bstart = -1;
+       info->bend = -1;
+       atomic_set(&info->generation,
+                  atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
+       spin_lock_init(&info->rdlock);
+       info->rdcount = 1;
+       info->hashsize = -1;
+       INIT_LIST_HEAD(&info->readdircache);
+
+       size = sbmax(inode->i_sb) * sizeof(struct inode *);
+       info->lower_inodes = kzalloc(size, GFP_KERNEL);
+       if (unlikely(!info->lower_inodes)) {
+               printk(KERN_CRIT "unionfs: no kernel memory when allocating "
+                      "lower-pointer array!\n");
+               BUG();
+       }
+
+       inode->i_version++;
+       inode->i_op = &unionfs_main_iops;
+       inode->i_fop = &unionfs_main_fops;
+
+       inode->i_mapping->a_ops = &unionfs_aops;
+
+       unionfs_read_unlock(inode->i_sb);
+}
+
+/*
+ * we now define delete_inode, because there are two VFS paths that may
+ * destroy an inode: one of them calls clear inode before doing everything
+ * else that's needed, and the other is fine.  This way we truncate the inode
+ * size (and its pages) and then clear our own inode, which will do an iput
+ * on our and the lower inode.
+ *
+ * No need to lock sb info's rwsem.
+ */
+static void unionfs_delete_inode(struct inode *inode)
+{
+       i_size_write(inode, 0); /* every f/s seems to do that */
+
+       if (inode->i_data.nrpages)
+               truncate_inode_pages(&inode->i_data, 0);
+
+       clear_inode(inode);
+}
+
+/*
+ * final actions when unmounting a file system
+ *
+ * No need to lock rwsem.
+ */
+static void unionfs_put_super(struct super_block *sb)
+{
+       int bindex, bstart, bend;
+       struct unionfs_sb_info *spd;
+       int leaks = 0;
+
+       spd = UNIONFS_SB(sb);
+       if (!spd)
+               return;
+
+       bstart = sbstart(sb);
+       bend = sbend(sb);
+
+       /* Make sure we have no leaks of branchget/branchput. */
+       for (bindex = bstart; bindex <= bend; bindex++)
+               if (unlikely(branch_count(sb, bindex) != 0)) {
+                       printk(KERN_CRIT
+                              "unionfs: branch %d has %d references left!\n",
+                              bindex, branch_count(sb, bindex));
+                       leaks = 1;
+               }
+       BUG_ON(leaks != 0);
+
+       kfree(spd->data);
+       kfree(spd);
+       sb->s_fs_info = NULL;
+}
+
+/*
+ * Since people use this to answer the "How big of a file can I write?"
+ * question, we report the size of the highest priority branch as the size of
+ * the union.
+ */
+static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+       int err = 0;
+       struct super_block *sb;
+       struct dentry *lower_dentry;
+
+       sb = dentry->d_sb;
+
+       unionfs_read_lock(sb);
+       unionfs_lock_dentry(dentry);
+
+       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
+               err = -ESTALE;
+               goto out;
+       }
+       unionfs_check_dentry(dentry);
+
+       lower_dentry = unionfs_lower_dentry(sb->s_root);
+       err = vfs_statfs(lower_dentry, buf);
+
+       /* set return buf to our f/s to avoid confusing user-level utils */
+       buf->f_type = UNIONFS_SUPER_MAGIC;
+       /*
+        * Our maximum file name can is shorter by a few bytes because every
+        * file name could potentially be whited-out.
+        *
+        * XXX: this restriction goes away with ODF.
+        */
+       buf->f_namelen -= UNIONFS_WHLEN;
+
+       /*
+        * reset two fields to avoid confusing user-land.
+        * XXX: is this still necessary?
+        */
+       memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
+       memset(&buf->f_spare, 0, sizeof(buf->f_spare));
+
+out:
+       unionfs_check_dentry(dentry);
+       unionfs_unlock_dentry(dentry);
+       unionfs_read_unlock(sb);
+       return err;
+}
+
+/* handle mode changing during remount */
+static noinline int do_remount_mode_option(char *optarg, int cur_branches,
+                                          struct unionfs_data *new_data,
+                                          struct path *new_lower_paths)
+{
+       int err = -EINVAL;
+       int perms, idx;
+       char *modename = strchr(optarg, '=');
+       struct nameidata nd;
+
+       /* by now, optarg contains the branch name */
+       if (!*optarg) {
+               printk(KERN_ERR
+                      "unionfs: no branch specified for mode change\n");
+               goto out;
+       }
+       if (!modename) {
+               printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
+                      optarg);
+               goto out;
+       }
+       *modename++ = '\0';
+       err = parse_branch_mode(modename, &perms);
+       if (err) {
+               printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
+                      modename, optarg);
+               goto out;
+       }
+
+       /*
+        * Find matching branch index.  For now, this assumes that nothing
+        * has been mounted on top of this Unionfs stack.  Once we have /odf
+        * and cache-coherency resolved, we'll address the branch-path
+        * uniqueness.
+        */
+       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
+       if (err) {
+               printk(KERN_ERR "unionfs: error accessing "
+                      "lower directory \"%s\" (error %d)\n",
+                      optarg, err);
+               goto out;
+       }
+       for (idx = 0; idx < cur_branches; idx++)
+               if (nd.mnt == new_lower_paths[idx].mnt &&
+                   nd.dentry == new_lower_paths[idx].dentry)
+                       break;
+       path_release(&nd);      /* no longer needed */
+       if (idx == cur_branches) {
+               err = -ENOENT;  /* err may have been reset above */
+               printk(KERN_ERR "unionfs: branch \"%s\" "
+                      "not found\n", optarg);
+               goto out;
+       }
+       /* check/change mode for existing branch */
+       /* we don't warn if perms==branchperms */
+       new_data[idx].branchperms = perms;
+       err = 0;
+out:
+       return err;
+}
+
+/* handle branch deletion during remount */
+static noinline int do_remount_del_option(char *optarg, int cur_branches,
+                                         struct unionfs_data *new_data,
+                                         struct path *new_lower_paths)
+{
+       int err = -EINVAL;
+       int idx;
+       struct nameidata nd;
+
+       /* optarg contains the branch name to delete */
+
+       /*
+        * Find matching branch index.  For now, this assumes that nothing
+        * has been mounted on top of this Unionfs stack.  Once we have /odf
+        * and cache-coherency resolved, we'll address the branch-path
+        * uniqueness.
+        */
+       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
+       if (err) {
+               printk(KERN_ERR "unionfs: error accessing "
+                      "lower directory \"%s\" (error %d)\n",
+                      optarg, err);
+               goto out;
+       }
+       for (idx = 0; idx < cur_branches; idx++)
+               if (nd.mnt == new_lower_paths[idx].mnt &&
+                   nd.dentry == new_lower_paths[idx].dentry)
+                       break;
+       path_release(&nd);      /* no longer needed */
+       if (idx == cur_branches) {
+               printk(KERN_ERR "unionfs: branch \"%s\" "
+                      "not found\n", optarg);
+               err = -ENOENT;
+               goto out;
+       }
+       /* check if there are any open files on the branch to be deleted */
+       if (atomic_read(&new_data[idx].open_files) > 0) {
+               err = -EBUSY;
+               goto out;
+       }
+
+       /*
+        * Now we have to delete the branch.  First, release any handles it
+        * has.  Then, move the remaining array indexes past "idx" in
+        * new_data and new_lower_paths one to the left.  Finally, adjust
+        * cur_branches.
+        */
+       pathput(&new_lower_paths[idx]);
+
+       if (idx < cur_branches - 1) {
+               /* if idx==cur_branches-1, we delete last branch: easy */
+               memmove(&new_data[idx], &new_data[idx+1],
+                       (cur_branches - 1 - idx) *
+                       sizeof(struct unionfs_data));
+               memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
+                       (cur_branches - 1 - idx) * sizeof(struct path));
+       }
+
+       err = 0;
+out:
+       return err;
+}
+
+/* handle branch insertion during remount */
+static noinline int do_remount_add_option(char *optarg, int cur_branches,
+                                         struct unionfs_data *new_data,
+                                         struct path *new_lower_paths,
+                                         int *high_branch_id)
+{
+       int err = -EINVAL;
+       int perms;
+       int idx = 0;            /* default: insert at beginning */
+       char *new_branch , *modename = NULL;
+       struct nameidata nd;
+
+       /*
+        * optarg can be of several forms:
+        *
+        * /bar:/foo            insert /foo before /bar
+        * /bar:/foo=ro         insert /foo in ro mode before /bar
+        * /foo                 insert /foo in the beginning (prepend)
+        * :/foo                insert /foo at the end (append)
+        */
+       if (*optarg == ':') {   /* append? */
+               new_branch = optarg + 1; /* skip ':' */
+               idx = cur_branches;
+               goto found_insertion_point;
+       }
+       new_branch = strchr(optarg, ':');
+       if (!new_branch) {      /* prepend? */
+               new_branch = optarg;
+               goto found_insertion_point;
+       }
+       *new_branch++ = '\0';   /* holds path+mode of new branch */
+
+       /*
+        * Find matching branch index.  For now, this assumes that nothing
+        * has been mounted on top of this Unionfs stack.  Once we have /odf
+        * and cache-coherency resolved, we'll address the branch-path
+        * uniqueness.
+        */
+       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
+       if (err) {
+               printk(KERN_ERR "unionfs: error accessing "
+                      "lower directory \"%s\" (error %d)\n",
+                      optarg, err);
+               goto out;
+       }
+       for (idx = 0; idx < cur_branches; idx++)
+               if (nd.mnt == new_lower_paths[idx].mnt &&
+                   nd.dentry == new_lower_paths[idx].dentry)
+                       break;
+       path_release(&nd);      /* no longer needed */
+       if (idx == cur_branches) {
+               printk(KERN_ERR "unionfs: branch \"%s\" "
+                      "not found\n", optarg);
+               err = -ENOENT;
+               goto out;
+       }
+
+       /*
+        * At this point idx will hold the index where the new branch should
+        * be inserted before.
+        */
+found_insertion_point:
+       /* find the mode for the new branch */
+       if (new_branch)
+               modename = strchr(new_branch, '=');
+       if (modename)
+               *modename++ = '\0';
+       if (!new_branch || !*new_branch) {
+               printk(KERN_ERR "unionfs: null new branch\n");
+               err = -EINVAL;
+               goto out;
+       }
+       err = parse_branch_mode(modename, &perms);
+       if (err) {
+               printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
+                      "branch \"%s\"\n", modename, new_branch);
+               goto out;
+       }
+       err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
+       if (err) {
+               printk(KERN_ERR "unionfs: error accessing "
+                      "lower directory \"%s\" (error %d)\n",
+                      new_branch, err);
+               goto out;
+       }
+       /*
+        * It's probably safe to check_mode the new branch to insert.  Note:
+        * we don't allow inserting branches which are unionfs's by
+        * themselves (check_branch returns EINVAL in that case).  This is
+        * because this code base doesn't support stacking unionfs: the ODF
+        * code base supports that correctly.
+        */
+       err = check_branch(&nd);
+       if (err) {
+               printk(KERN_ERR "unionfs: lower directory "
+                      "\"%s\" is not a valid branch\n", optarg);
+               path_release(&nd);
+               goto out;
+       }
+
+       /*
+        * Now we have to insert the new branch.  But first, move the bits
+        * to make space for the new branch, if needed.  Finally, adjust
+        * cur_branches.
+        * We don't release nd here; it's kept until umount/remount.
+        */
+       if (idx < cur_branches) {
+               /* if idx==cur_branches, we append: easy */
+               memmove(&new_data[idx+1], &new_data[idx],
+                       (cur_branches - idx) * sizeof(struct unionfs_data));
+               memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
+                       (cur_branches - idx) * sizeof(struct path));
+       }
+       new_lower_paths[idx].dentry = nd.dentry;
+       new_lower_paths[idx].mnt = nd.mnt;
+
+       new_data[idx].sb = nd.dentry->d_sb;
+       atomic_set(&new_data[idx].open_files, 0);
+       new_data[idx].branchperms = perms;
+       new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
+
+       err = 0;
+out:
+       return err;
+}
+
+
+/*
+ * Support branch management options on remount.
+ *
+ * See Documentation/filesystems/unionfs/ for details.
+ *
+ * @flags: numeric mount options
+ * @options: mount options string
+ *
+ * This function can rearrange a mounted union dynamically, adding and
+ * removing branches, including changing branch modes.  Clearly this has to
+ * be done safely and atomically.  Luckily, the VFS already calls this
+ * function with lock_super(sb) and lock_kernel() held, preventing
+ * concurrent mixing of new mounts, remounts, and unmounts.  Moreover,
+ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
+ * to purge dentries/inodes from our superblock, and also called
+ * fsync_super(sb) to purge any dirty pages.  So we're good.
+ *
+ * XXX: however, our remount code may also need to invalidate mapped pages
+ * so as to force them to be re-gotten from the (newly reconfigured) lower
+ * branches.  This has to wait for proper mmap and cache coherency support
+ * in the VFS.
+ *
+ */
+static int unionfs_remount_fs(struct super_block *sb, int *flags,
+                             char *options)
+{
+       int err = 0;
+       int i;
+       char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
+       char *optname;
+       int cur_branches = 0;   /* no. of current branches */
+       int new_branches = 0;   /* no. of branches actually left in the end */
+       int add_branches;       /* est. no. of branches to add */
+       int del_branches;       /* est. no. of branches to del */
+       int max_branches;       /* max possible no. of branches */
+       struct unionfs_data *new_data = NULL, *tmp_data = NULL;
+       struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
+       struct inode **new_lower_inodes = NULL;
+       int new_high_branch_id; /* new high branch ID */
+       int size;               /* memory allocation size, temp var */
+       int old_ibstart, old_ibend;
+
+       unionfs_write_lock(sb);
+
+       /*
+        * The VFS will take care of "ro" and "rw" flags, and we can safely
+        * ignore MS_SILENT, but anything else left over is an error.  So we
+        * need to check if any other flags may have been passed (none are
+        * allowed/supported as of now).
+        */
+       if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
+               printk(KERN_ERR
+                      "unionfs: remount flags 0x%x unsupported\n", *flags);
+               err = -EINVAL;
+               goto out_error;
+       }
+
+       /*
+        * If 'options' is NULL, it's probably because the user just changed
+        * the union to a "ro" or "rw" and the VFS took care of it.  So
+        * nothing to do and we're done.
+        */
+       if (!options || options[0] == '\0')
+               goto out_error;
+
+       /*
+        * Find out how many branches we will have in the end, counting
+        * "add" and "del" commands.  Copy the "options" string because
+        * strsep modifies the string and we need it later.
+        */
+       tmp_to_free = kstrdup(options, GFP_KERNEL);
+       optionstmp = tmp_to_free;
+       if (unlikely(!optionstmp)) {
+               err = -ENOMEM;
+               goto out_free;
+       }
+       cur_branches = sbmax(sb); /* current no. branches */
+       new_branches = sbmax(sb);
+       del_branches = 0;
+       add_branches = 0;
+       new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
+       while ((optname = strsep(&optionstmp, ",")) != NULL) {
+               char *optarg;
+
+               if (!optname || !*optname)
+                       continue;
+
+               optarg = strchr(optname, '=');
+               if (optarg)
+                       *optarg++ = '\0';
+
+               if (!strcmp("add", optname))
+                       add_branches++;
+               else if (!strcmp("del", optname))
+                       del_branches++;
+       }
+       kfree(tmp_to_free);
+       /* after all changes, will we have at least one branch left? */
+       if ((new_branches + add_branches - del_branches) < 1) {
+               printk(KERN_ERR
+                      "unionfs: no branches left after remount\n");
+               err = -EINVAL;
+               goto out_free;
+       }
+
+       /*
+        * Since we haven't actually parsed all the add/del options, nor
+        * have we checked them for errors, we don't know for sure how many
+        * branches we will have after all changes have taken place.  In
+        * fact, the total number of branches left could be less than what
+        * we have now.  So we need to allocate space for a temporary
+        * placeholder that is at least as large as the maximum number of
+        * branches we *could* have, which is the current number plus all
+        * the additions.  Once we're done with these temp placeholders, we
+        * may have to re-allocate the final size, copy over from the temp,
+        * and then free the temps (done near the end of this function).
+        */
+       max_branches = cur_branches + add_branches;
+       /* allocate space for new pointers to lower dentry */
+       tmp_data = kcalloc(max_branches,
+                          sizeof(struct unionfs_data), GFP_KERNEL);
+       if (unlikely(!tmp_data)) {
+               err = -ENOMEM;
+               goto out_free;
+       }
+       /* allocate space for new pointers to lower paths */
+       tmp_lower_paths = kcalloc(max_branches,
+                                 sizeof(struct path), GFP_KERNEL);
+       if (unlikely(!tmp_lower_paths)) {
+               err = -ENOMEM;
+               goto out_free;
+       }
+       /* copy current info into new placeholders, incrementing refcnts */
+       memcpy(tmp_data, UNIONFS_SB(sb)->data,
+              cur_branches * sizeof(struct unionfs_data));
+       memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
+              cur_branches * sizeof(struct path));
+       for (i = 0; i < cur_branches; i++)
+               pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */
+
+       /*******************************************************************
+        * For each branch command, do path_lookup on the requested branch,
+        * and apply the change to a temp branch list.  To handle errors, we
+        * already dup'ed the old arrays (above), and increased the refcnts
+        * on various f/s objects.  So now we can do all the path_lookups
+        * and branch-management commands on the new arrays.  If it fail mid
+        * way, we free the tmp arrays and *put all objects.  If we succeed,
+        * then we free old arrays and *put its objects, and then replace
+        * the arrays with the new tmp list (we may have to re-allocate the
+        * memory because the temp lists could have been larger than what we
+        * actually needed).
+        *******************************************************************/
+
+       while ((optname = strsep(&options, ",")) != NULL) {
+               char *optarg;
+
+               if (!optname || !*optname)
+                       continue;
+               /*
+                * At this stage optname holds a comma-delimited option, but
+                * without the commas.  Next, we need to break the string on
+                * the '=' symbol to separate CMD=ARG, where ARG itself can
+                * be KEY=VAL.  For example, in mode=/foo=rw, CMD is "mode",
+                * KEY is "/foo", and VAL is "rw".
+                */
+               optarg = strchr(optname, '=');
+               if (optarg)
+                       *optarg++ = '\0';
+               /* incgen remount option (instead of old ioctl) */
+               if (!strcmp("incgen", optname)) {
+                       err = 0;
+                       goto out_no_change;
+               }
+
+               /*
+                * All of our options take an argument now.  (Insert ones
+                * that don't above this check.)  So at this stage optname
+                * contains the CMD part and optarg contains the ARG part.
+                */
+               if (!optarg || !*optarg) {
+                       printk(KERN_ERR "unionfs: all remount options require "
+                              "an argument (%s)\n", optname);
+                       err = -EINVAL;
+                       goto out_release;
+               }
+
+               if (!strcmp("add", optname)) {
+                       err = do_remount_add_option(optarg, new_branches,
+                                                   tmp_data,
+                                                   tmp_lower_paths,
+                                                   &new_high_branch_id);
+                       if (err)
+                               goto out_release;
+                       new_branches++;
+                       if (new_branches > UNIONFS_MAX_BRANCHES) {
+                               printk(KERN_ERR "unionfs: command exceeds "
+                                      "%d branches\n", UNIONFS_MAX_BRANCHES);
+                               err = -E2BIG;
+                               goto out_release;
+                       }
+                       continue;
+               }
+               if (!strcmp("del", optname)) {
+                       err = do_remount_del_option(optarg, new_branches,
+                                                   tmp_data,
+                                                   tmp_lower_paths);
+                       if (err)
+                               goto out_release;
+                       new_branches--;
+                       continue;
+               }
+               if (!strcmp("mode", optname)) {
+                       err = do_remount_mode_option(optarg, new_branches,
+                                                    tmp_data,
+                                                    tmp_lower_paths);
+                       if (err)
+                               goto out_release;
+                       continue;
+               }
+
+               /*
+                * When you use "mount -o remount,ro", mount(8) will
+                * reportedly pass the original dirs= string from
+                * /proc/mounts.  So for now, we have to ignore dirs= and
+                * not consider it an error, unless we want to allow users
+                * to pass dirs= in remount.  Note that to allow the VFS to
+                * actually process the ro/rw remount options, we have to
+                * return 0 from this function.
+                */
+               if (!strcmp("dirs", optname)) {
+                       printk(KERN_WARNING
+                              "unionfs: remount ignoring option \"%s\"\n",
+                              optname);
+                       continue;
+               }
+
+               err = -EINVAL;
+               printk(KERN_ERR
+                      "unionfs: unrecognized option \"%s\"\n", optname);
+               goto out_release;
+       }
+
+out_no_change:
+
+       /******************************************************************
+        * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
+        * see if we need to allocate a small-sized new vector, copy the
+        * vectors to their correct place, release the refcnt of the older
+        * ones, and return.  Also handle invalidating any pages that will
+        * have to be re-read.
+        *******************************************************************/
+
+       if (!(tmp_data[0].branchperms & MAY_WRITE)) {
+               printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
+                      "(use \"remount,ro\" to create a read-only union)\n");
+               err = -EINVAL;
+               goto out_release;
+       }
+
+       /* (re)allocate space for new pointers to lower dentry */
+       size = new_branches * sizeof(struct unionfs_data);
+       new_data = krealloc(tmp_data, size, GFP_KERNEL);
+       if (unlikely(!new_data)) {
+               err = -ENOMEM;
+               goto out_release;
+       }
+
+       /* allocate space for new pointers to lower paths */
+       size = new_branches * sizeof(struct path);
+       new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
+       if (unlikely(!new_lower_paths)) {
+               err = -ENOMEM;
+               goto out_release;
+       }
+
+       /* allocate space for new pointers to lower inodes */
+       new_lower_inodes = kcalloc(new_branches,
+                                  sizeof(struct inode *), GFP_KERNEL);
+       if (unlikely(!new_lower_inodes)) {
+               err = -ENOMEM;
+               goto out_release;
+       }
+
+       /*
+        * OK, just before we actually put the new set of branches in place,
+        * we need to ensure that our own f/s has no dirty objects left.
+        * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
+        * fsync_super(sb), taking care of dentries, inodes, and dirty
+        * pages.  So all that's left is for us to invalidate any leftover
+        * (non-dirty) pages to ensure that they will be re-read from the
+        * new lower branches (and to support mmap).
+        */
+
+       /*
+        * Now we call drop_pagecache_sb() to invalidate all pages in this
+        * super.  This function calls invalidate_inode_pages(mapping),
+        * which calls invalidate_mapping_pages(): the latter, however, will
+        * not invalidate pages which are dirty, locked, under writeback, or
+        * mapped into page tables.  We shouldn't have to worry about dirty
+        * or under-writeback pages, because do_remount_sb() called
+        * fsync_super() which would not have returned until all dirty pages
+        * were flushed.
+        *
+        * But do we have to worry about locked pages?  Is there any chance
+        * that in here we'll get locked pages?
+        *
+        * XXX: what about pages mapped into pagetables?  Are these pages
+        * which user processes may have mmap(2)'ed?  If so, then we need to
+        * invalidate those too, no?  Maybe we'll have to write our own
+        * version of invalidate_mapping_pages() which also handled mapped
+        * pages.
+        *
+        * XXX: Alternatively, maybe we should call truncate_inode_pages(),
+        * which use two passes over the pages list, and will truncate all
+        * pages.
+        */
+       drop_pagecache_sb(sb);
+
+       /* copy new vectors into their correct place */
+       tmp_data = UNIONFS_SB(sb)->data;
+       UNIONFS_SB(sb)->data = new_data;
+       new_data = NULL;        /* so don't free good pointers below */
+       tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
+       UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
+       new_lower_paths = NULL; /* so don't free good pointers below */
+
+       /* update our unionfs_sb_info and root dentry index of last branch */
+       i = sbmax(sb);          /* save no. of branches to release at end */
+       sbend(sb) = new_branches - 1;
+       set_dbend(sb->s_root, new_branches - 1);
+       old_ibstart = ibstart(sb->s_root->d_inode);
+       old_ibend = ibend(sb->s_root->d_inode);
+       ibend(sb->s_root->d_inode) = new_branches - 1;
+       UNIONFS_D(sb->s_root)->bcount = new_branches;
+       new_branches = i; /* no. of branches to release below */
+
+       /*
+        * Update lower inodes: 3 steps
+        * 1. grab ref on all new lower inodes
+        */
+       for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
+               struct dentry *lower_dentry =
+                       unionfs_lower_dentry_idx(sb->s_root, i);
+               igrab(lower_dentry->d_inode);
+               new_lower_inodes[i] = lower_dentry->d_inode;
+       }
+       /* 2. release reference on all older lower inodes */
+       for (i = old_ibstart; i <= old_ibend; i++) {
+               iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
+               unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
+       }
+       kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
+       /* 3. update root dentry's inode to new lower_inodes array */
+       UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
+       new_lower_inodes = NULL;
+
+       /* maxbytes may have changed */
+       sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
+       /* update high branch ID */
+       sbhbid(sb) = new_high_branch_id;
+
+       /* update our sb->generation for revalidating objects */
+       i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
+       atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
+       atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
+       if (!(*flags & MS_SILENT))
+               pr_info("unionfs: new generation number %d\n", i);
+       /* finally, update the root dentry's times */
+       unionfs_copy_attr_times(sb->s_root->d_inode);
+       err = 0;                /* reset to success */
+
+       /*
+        * The code above falls through to the next label, and releases the
+        * refcnts of the older ones (stored in tmp_*): if we fell through
+        * here, it means success.  However, if we jump directly to this
+        * label from any error above, then an error occurred after we
+        * grabbed various refcnts, and so we have to release the
+        * temporarily constructed structures.
+        */
+out_release:
+       /* no need to cleanup/release anything in tmp_data */
+       if (tmp_lower_paths)
+               for (i = 0; i < new_branches; i++)
+                       pathput(&tmp_lower_paths[i]);
+out_free:
+       kfree(tmp_lower_paths);
+       kfree(tmp_data);
+       kfree(new_lower_paths);
+       kfree(new_data);
+       kfree(new_lower_inodes);
+out_error:
+       unionfs_check_dentry(sb->s_root);
+       unionfs_write_unlock(sb);
+       return err;
+}
+
+/*
+ * Called by iput() when the inode reference count reached zero
+ * and the inode is not hashed anywhere.  Used to clear anything
+ * that needs to be, before the inode is completely destroyed and put
+ * on the inode free list.
+ *
+ * No need to lock sb info's rwsem.
+ */
+static void unionfs_clear_inode(struct inode *inode)
+{
+       int bindex, bstart, bend;
+       struct inode *lower_inode;
+       struct list_head *pos, *n;
+       struct unionfs_dir_state *rdstate;
+
+       list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
+               rdstate = list_entry(pos, struct unionfs_dir_state, cache);
+               list_del(&rdstate->cache);
+               free_rdstate(rdstate);
+       }
+
+       /*
+        * Decrement a reference to a lower_inode, which was incremented
+        * by our read_inode when it was created initially.
+        */
+       bstart = ibstart(inode);
+       bend = ibend(inode);
+       if (bstart >= 0) {
+               for (bindex = bstart; bindex <= bend; bindex++) {
+                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
+                       if (!lower_inode)
+                               continue;
+                       iput(lower_inode);
+               }
+       }
+
+       kfree(UNIONFS_I(inode)->lower_inodes);
+       UNIONFS_I(inode)->lower_inodes = NULL;
+}
+
+static struct inode *unionfs_alloc_inode(struct super_block *sb)
+{
+       struct unionfs_inode_info *i;
+
+       i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
+       if (unlikely(!i))
+               return NULL;
+
+       /* memset everything up to the inode to 0 */
+       memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
+
+       i->vfs_inode.i_version = 1;
+       return &i->vfs_inode;
+}
+
+static void unionfs_destroy_inode(struct inode *inode)
+{
+       kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
+}
+
+/* unionfs inode cache constructor */
+static void init_once(struct kmem_cache *cachep, void *obj)
+{
+       struct unionfs_inode_info *i = obj;
+
+       inode_init_once(&i->vfs_inode);
+}
+
+int unionfs_init_inode_cache(void)
+{
+       int err = 0;
+
+       unionfs_inode_cachep =
+               kmem_cache_create("unionfs_inode_cache",
+                                 sizeof(struct unionfs_inode_info), 0,
+                                 SLAB_RECLAIM_ACCOUNT, init_once);
+       if (unlikely(!unionfs_inode_cachep))
+               err = -ENOMEM;
+       return err;
+}
+
+/* unionfs inode cache destructor */
+void unionfs_destroy_inode_cache(void)
+{
+       if (unionfs_inode_cachep)
+               kmem_cache_destroy(unionfs_inode_cachep);
+}
+
+/*
+ * Called when we have a dirty inode, right here we only throw out
+ * parts of our readdir list that are too old.
+ *
+ * No need to grab sb info's rwsem.
+ */
+static int unionfs_write_inode(struct inode *inode, int sync)
+{
+       struct list_head *pos, *n;
+       struct unionfs_dir_state *rdstate;
+
+       spin_lock(&UNIONFS_I(inode)->rdlock);
+       list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
+               rdstate = list_entry(pos, struct unionfs_dir_state, cache);
+               /* We keep this list in LRU order. */
+               if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
+                       break;
+               UNIONFS_I(inode)->rdcount--;
+               list_del(&rdstate->cache);
+               free_rdstate(rdstate);
+       }
+       spin_unlock(&UNIONFS_I(inode)->rdlock);
+
+       return 0;
+}
+
+/*
+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
+ * code can actually succeed and won't leave tasks that need handling.
+ */
+static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
+{
+       struct super_block *sb, *lower_sb;
+       struct vfsmount *lower_mnt;
+       int bindex, bstart, bend;
+
+       if (!(flags & MNT_FORCE))
+               /*
+                * we are not being MNT_FORCE'd, therefore we should emulate
+                * old behavior
+                */
+               return;
+
+       sb = mnt->mnt_sb;
+
+       unionfs_read_lock(sb);
+
+       bstart = sbstart(sb);
+       bend = sbend(sb);
+       for (bindex = bstart; bindex <= bend; bindex++) {
+               lower_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
+               lower_sb = unionfs_lower_super_idx(sb, bindex);
+
+               if (lower_mnt && lower_sb && lower_sb->s_op &&
+                   lower_sb->s_op->umount_begin)
+                       lower_sb->s_op->umount_begin(lower_mnt, flags);
+       }
+
+       unionfs_read_unlock(sb);
+}
+
+static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+       struct super_block *sb = mnt->mnt_sb;
+       int ret = 0;
+       char *tmp_page;
+       char *path;
+       int bindex, bstart, bend;
+       int perms;
+
+       unionfs_read_lock(sb);
+
+       unionfs_lock_dentry(sb->s_root);
+
+       tmp_page = (char *) __get_free_page(GFP_KERNEL);
+       if (unlikely(!tmp_page)) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       bstart = sbstart(sb);
+       bend = sbend(sb);
+
+       seq_printf(m, ",dirs=");
+       for (bindex = bstart; bindex <= bend; bindex++) {
+               path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex),
+                             unionfs_lower_mnt_idx(sb->s_root, bindex),
+                             tmp_page, PAGE_SIZE);
+               if (IS_ERR(path)) {
+                       ret = PTR_ERR(path);
+                       goto out;
+               }
+
+               perms = branchperms(sb, bindex);
+
+               seq_printf(m, "%s=%s", path,
+                          perms & MAY_WRITE ? "rw" : "ro");
+               if (bindex != bend)
+                       seq_printf(m, ":");
+       }
+
+out:
+       free_page((unsigned long) tmp_page);
+
+       unionfs_unlock_dentry(sb->s_root);
+
+       unionfs_read_unlock(sb);
+
+       return ret;
+}
+
+struct super_operations unionfs_sops = {
+       .read_inode     = unionfs_read_inode,
+       .delete_inode   = unionfs_delete_inode,
+       .put_super      = unionfs_put_super,
+       .statfs         = unionfs_statfs,
+       .remount_fs     = unionfs_remount_fs,
+       .clear_inode    = unionfs_clear_inode,
+       .umount_begin   = unionfs_umount_begin,
+       .show_options   = unionfs_show_options,
+       .write_inode    = unionfs_write_inode,
+       .alloc_inode    = unionfs_alloc_inode,
+       .destroy_inode  = unionfs_destroy_inode,
+};
-- 
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to