On Fri, Sep 15, 2017 at 10:06:24AM +0200, Miklos Szeredi wrote:
> And, btw. I also hate all the hacks we need to do in the VFS for the
> sake of overlayfs.  It may actually end up all being removed and all
> the stacking moved to overlayfs; that's something I'd really like to
> look at.   All of these hacks are there because overlayfs lets the
> open file be owned by the underlying filesystem, which is good for
> performance and results in simpler code in overlayfs, but may not
> actually be worth it.

And here's the prototype.  Poof, d_real() is gone.

It definitely has got problems, but just maybe it can be made better than the
current mess.  It *does* solve the ro-rw inconsistency for normal reads (not
mmaps).

Thanks,
Miklos

---
 fs/overlayfs/Makefile    |    2 
 fs/overlayfs/file.c      |  167 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/inode.c     |    1 
 fs/overlayfs/overlayfs.h |    5 +
 fs/overlayfs/super.c     |   65 ------------------
 5 files changed, 174 insertions(+), 66 deletions(-)

--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_OVERLAY_FS) += overlay.o
 
-overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o
+overlay-objs := super.o namei.o util.o inode.o dir.o file.o readdir.o copy_up.o
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/cred.h>
+#include <linux/file.h>
+#include "overlayfs.h"
+
+static int ovl_check_append_only(struct inode *inode, int flag)
+{
+       /*
+        * This test was moot in vfs may_open() because overlay inode does
+        * not have the S_APPEND flag, so re-check on real upper inode
+        */
+       if (IS_APPEND(inode)) {
+               if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
+                       return -EPERM;
+               if (flag & O_TRUNC)
+                       return -EPERM;
+       }
+
+       return 0;
+}
+
+static int ovl_open(struct inode *inode, struct file *file)
+{
+       struct dentry *dentry = file->f_path.dentry;
+       int err;
+
+       err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+       if (!err)
+               err = ovl_check_append_only(d_inode(ovl_dentry_real(dentry)),
+                                           file->f_flags);
+
+       return err;
+}
+
+static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+{
+       int ifl = iocb->ki_flags;
+       rwf_t flags = 0;
+
+       if (ifl & IOCB_NOWAIT)
+               flags |= RWF_NOWAIT;
+       if (ifl & IOCB_HIPRI)
+               flags |= RWF_HIPRI;
+       if (ifl & IOCB_DSYNC)
+               flags |= RWF_DSYNC;
+       if (ifl & IOCB_SYNC)
+               flags |= RWF_SYNC;
+
+       return flags;
+}
+
+static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct path realpath;
+       struct file *realfile;
+       ssize_t ret;
+
+       ovl_path_real(file->f_path.dentry, &realpath);
+       realfile = dentry_open(&realpath, file->f_flags, current_cred());
+       if (IS_ERR(realfile))
+               return PTR_ERR(realfile);
+
+       ret = vfs_iter_read(realfile, iter, &iocb->ki_pos,
+                           ovl_iocb_to_rwf(iocb));
+
+       fput(realfile);
+
+       return ret;
+}
+
+static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct path realpath;
+       struct file *realfile;
+       ssize_t ret;
+
+       ovl_path_real(file->f_path.dentry, &realpath);
+       realfile = dentry_open(&realpath, file->f_flags, current_cred());
+       if (IS_ERR(realfile))
+               return PTR_ERR(realfile);
+
+       ret = vfs_iter_write(realfile, iter, &iocb->ki_pos,
+                            ovl_iocb_to_rwf(iocb));
+
+       fput(realfile);
+
+       return ret;
+}
+
+static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       struct path realpath;
+       struct file *realfile;
+       int ret;
+
+       ovl_path_real(file->f_path.dentry, &realpath);
+       realfile = dentry_open(&realpath, file->f_flags, current_cred());
+       if (IS_ERR(realfile))
+               return PTR_ERR(realfile);
+
+       ret = vfs_fsync_range(realfile, start, end, datasync);
+
+       fput(realfile);
+
+       return ret;
+}
+
+static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
+{
+       struct inode *realinode = d_inode(ovl_dentry_real(file->f_path.dentry));
+
+       return generic_file_llseek_size(file, offset, whence,
+                                       realinode->i_sb->s_maxbytes,
+                                       i_size_read(realinode));
+}
+
+static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct inode *inode = file_inode(file);
+       struct path realpath;
+       struct file *realfile;
+       struct inode *realinode;
+       int err;
+
+       ovl_path_real(file->f_path.dentry, &realpath);
+       realinode = d_inode(realpath.dentry);
+
+       realfile = dentry_open(&realpath, file->f_flags, current_cred());
+       if (IS_ERR(realfile))
+               return PTR_ERR(realfile);
+
+       err = -ENODEV;
+       if (!realfile->f_op->mmap)
+               goto out;
+
+       file->f_mapping = realfile->f_mapping;
+       if (inode->i_mapping == &inode->i_data)
+               inode->i_mapping = realinode->i_mapping;
+
+       err = -EBUSY;
+       if (inode->i_mapping != realinode->i_mapping)
+               goto out;
+
+       err = call_mmap(realfile, vma);
+out:
+       fput(realfile);
+
+       return err;
+}
+
+const struct file_operations ovl_file_operations = {
+       .open           = ovl_open,
+       .read_iter      = ovl_read_iter,
+       .write_iter     = ovl_write_iter,
+       .fsync          = ovl_fsync,
+       .llseek         = ovl_llseek,
+       .mmap           = ovl_mmap,
+};
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -436,6 +436,7 @@ static void ovl_fill_inode(struct inode
        switch (mode & S_IFMT) {
        case S_IFREG:
                inode->i_op = &ovl_file_inode_operations;
+               inode->i_fop = &ovl_file_operations;
                break;
 
        case S_IFDIR:
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,8 @@
 
 #include <linux/kernel.h>
 #include <linux/uuid.h>
+#include <linux/xattr.h>
+#include <linux/fs.h>
 
 enum ovl_path_type {
        __OVL_PATH_UPPER        = (1 << 0),
@@ -309,6 +311,9 @@ int ovl_create_real(struct inode *dir, s
                    struct dentry *hardlink, bool debug);
 int ovl_cleanup(struct inode *dir, struct dentry *dentry);
 
+/* file.c */
+extern const struct file_operations ovl_file_operations;
+
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -52,69 +52,6 @@ static void ovl_dentry_release(struct de
        }
 }
 
-static int ovl_check_append_only(struct inode *inode, int flag)
-{
-       /*
-        * This test was moot in vfs may_open() because overlay inode does
-        * not have the S_APPEND flag, so re-check on real upper inode
-        */
-       if (IS_APPEND(inode)) {
-               if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
-                       return -EPERM;
-               if (flag & O_TRUNC)
-                       return -EPERM;
-       }
-
-       return 0;
-}
-
-static struct dentry *ovl_d_real(struct dentry *dentry,
-                                const struct inode *inode,
-                                unsigned int open_flags, unsigned int flags)
-{
-       struct dentry *real;
-       int err;
-
-       if (flags & D_REAL_UPPER)
-               return ovl_dentry_upper(dentry);
-
-       if (!d_is_reg(dentry)) {
-               if (!inode || inode == d_inode(dentry))
-                       return dentry;
-               goto bug;
-       }
-
-       if (open_flags) {
-               err = ovl_open_maybe_copy_up(dentry, open_flags);
-               if (err)
-                       return ERR_PTR(err);
-       }
-
-       real = ovl_dentry_upper(dentry);
-       if (real && (!inode || inode == d_inode(real))) {
-               if (!inode) {
-                       err = ovl_check_append_only(d_inode(real), open_flags);
-                       if (err)
-                               return ERR_PTR(err);
-               }
-               return real;
-       }
-
-       real = ovl_dentry_lower(dentry);
-       if (!real)
-               goto bug;
-
-       /* Handle recursion */
-       real = d_real(real, inode, open_flags, 0);
-
-       if (!inode || inode == d_inode(real))
-               return real;
-bug:
-       WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
-            inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
-       return dentry;
-}
-
 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
@@ -158,12 +95,10 @@ static int ovl_dentry_weak_revalidate(st
 
 static const struct dentry_operations ovl_dentry_operations = {
        .d_release = ovl_dentry_release,
-       .d_real = ovl_d_real,
 };
 
 static const struct dentry_operations ovl_reval_dentry_operations = {
        .d_release = ovl_dentry_release,
-       .d_real = ovl_d_real,
        .d_revalidate = ovl_dentry_revalidate,
        .d_weak_revalidate = ovl_dentry_weak_revalidate,
 };

Reply via email to