On Tue, Dec 01, 2015 at 12:21:31AM +0100, Nicolai Stange wrote:
> Nothing prevents a dentry found by path lookup before a return of
> __debugfs_remove() to actually get opened after that return. Now, after
> the return of __debugfs_remove(), there are no guarantees whatsoever
> regarding the memory the corresponding inode's file_operations object
> had been kept in.
> 
> Since __debugfs_remove() is seldomly invoked, usually from module exit
> handlers only, the race is hard to trigger and the impact is very low.
> 
> A discussion of the problem outlined above as well as a suggested
> solution can be found in the (sub-)thread rooted at
> 
>   http://lkml.kernel.org/g/20130401203445.ga20...@zeniv.linux.org.uk
>   ("Yet another pipe related oops.")
> 
> Basically, Greg KH suggests to introduce an intermediate fops and
> Al Viro points out that a pointer to the original ones may be stored in
> ->d_fsdata.
> 
> Follow this line of reasoning:
> - Add SRCU as a reverse dependency of DEBUG_FS.
> - Introduce a srcu_struct object for the debugfs subsystem.
> - In debugfs_create_file(), store a pointer to the original
>   file_operations object in ->d_fsdata.
> - In __debugfs_remove(), clear out that dentry->d_fsdata member again.
>   debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace
>   period before returning to their caller.
> - Introduce an intermediate file_operations object named
>   "debugfs_proxy_file_operations". It's ->open() functions checks,
>   under the protection of a SRCU read lock, whether the "original"
>   file_operations pointed to by ->d_fsdata is still valid and if so,
>   tries to acquire a reference on the owning module. On success, it sets
>   the file object's ->f_op to the original file_operations and forwards
>   the ongoing open() call to the original ->open().
> - For clarity, rename the former debugfs_file_operations to
>   debugfs_noop_file_operations -- they are in no way canonical.
> 
> The choice of SRCU over "normal" RCU is justified by the fact, that the
> former may also be used to protect ->i_private data from going away
> during the execution of a file's readers and writers which may (and do)
> sleep.
> 
> Signed-off-by: Nicolai Stange <nicsta...@gmail.com>

One question below.  Other than that, looks good from an RCU perspective.

                                                Thanx, Paul

> ---
>  Applicable to the Linus tree.
>  The second of the two patches depends on the first one
> 
>  fs/debugfs/file.c       | 29 ++++++++++++++++++++++++++++-
>  fs/debugfs/inode.c      | 16 +++++++++++++---
>  include/linux/debugfs.h |  6 +++++-
>  lib/Kconfig.debug       |  1 +
>  4 files changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
> index d2ba12e..67df2c9 100644
> --- a/fs/debugfs/file.c
> +++ b/fs/debugfs/file.c
> @@ -35,13 +35,40 @@ static ssize_t default_write_file(struct file *file, 
> const char __user *buf,
>       return count;
>  }
> 
> -const struct file_operations debugfs_file_operations = {
> +const struct file_operations debugfs_noop_file_operations = {
>       .read =         default_read_file,
>       .write =        default_write_file,
>       .open =         simple_open,
>       .llseek =       noop_llseek,
>  };
> 
> +static int proxy_open(struct inode *inode, struct file *filp)
> +{
> +     struct dentry * const dentry = filp->f_path.dentry;
> +     const struct file_operations __rcu *rcu_real_fops;
> +     const struct file_operations *real_fops;
> +     int srcu_idx;
> +
> +     srcu_idx = srcu_read_lock(&debugfs_srcu);
> +     rcu_real_fops = (const struct file_operations __rcu *)dentry->d_fsdata;
> +     real_fops = srcu_dereference(rcu_real_fops, &debugfs_srcu);
> +     real_fops = fops_get(real_fops);
> +     srcu_read_unlock(&debugfs_srcu, srcu_idx);
> +
> +     if (!real_fops)
> +             return -ENOENT;
> +
> +     replace_fops(filp, real_fops);
> +     if (filp->f_op->open)
> +             return filp->f_op->open(inode, filp);
> +
> +     return 0;
> +}
> +
> +const struct file_operations debugfs_proxy_file_operations = {
> +     .open = proxy_open,
> +};
> +
>  static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
>                                         struct dentry *parent, void *value,
>                                         const struct file_operations *fops,
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index b7fcc0d..8ae2e1a 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c
> @@ -30,6 +30,8 @@
> 
>  #define DEBUGFS_DEFAULT_MODE 0700
> 
> +DEFINE_SRCU(debugfs_srcu);
> +
>  static struct vfsmount *debugfs_mount;
>  static int debugfs_mount_count;
>  static bool debugfs_registered;
> @@ -340,8 +342,12 @@ struct dentry *debugfs_create_file(const char *name, 
> umode_t mode,
>               return failed_creating(dentry);
> 
>       inode->i_mode = mode;
> -     inode->i_fop = fops ? fops : &debugfs_file_operations;
>       inode->i_private = data;
> +
> +     inode->i_fop = fops ? &debugfs_proxy_file_operations
> +             : &debugfs_noop_file_operations;
> +     rcu_assign_pointer(dentry->d_fsdata, (void *)fops);
> +
>       d_instantiate(dentry, inode);
>       fsnotify_create(d_inode(dentry->d_parent), dentry);
>       return end_creating(dentry);
> @@ -523,10 +529,12 @@ static int __debugfs_remove(struct dentry *dentry, 
> struct dentry *parent)
> 
>       if (simple_positive(dentry)) {
>               dget(dentry);
> -             if (d_is_dir(dentry))
> +             if (d_is_dir(dentry)) {
>                       ret = simple_rmdir(d_inode(parent), dentry);
> -             else
> +             } else {
>                       simple_unlink(d_inode(parent), dentry);
> +                     rcu_assign_pointer(dentry->d_fsdata, NULL);
> +             }
>               if (!ret)
>                       d_delete(dentry);
>               dput(dentry);
> @@ -565,6 +573,7 @@ void debugfs_remove(struct dentry *dentry)
>       mutex_unlock(&d_inode(parent)->i_mutex);
>       if (!ret)
>               simple_release_fs(&debugfs_mount, &debugfs_mount_count);
> +     synchronize_srcu(&debugfs_srcu);

So the reason that this is OK from a performance perspective is that
people removing large quantities of debugfs entries are supposed to
use debugfs_remove_recursive()?

If there are valid use cases with lots of debugfs_remove() calls
in quick succession, and if this results in excessive latency due
to lots of synchronize_srcu() calls, then one approach would be to
have a debugfs_remove_nowait() or some such that omitted the final
synchronize_srcu().  The last call could use debugfs_remove() to clean
things up.  There are of course a number of alternative approaches.

But if there is no performance issue with real workloads, then there
is of course no need to do anything.

>  }
>  EXPORT_SYMBOL_GPL(debugfs_remove);
> 
> @@ -642,6 +651,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
>       if (!__debugfs_remove(child, parent))
>               simple_release_fs(&debugfs_mount, &debugfs_mount_count);
>       mutex_unlock(&d_inode(parent)->i_mutex);
> +     synchronize_srcu(&debugfs_srcu);
>  }
>  EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
> 
> diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
> index 19c066d..f8c7494 100644
> --- a/include/linux/debugfs.h
> +++ b/include/linux/debugfs.h
> @@ -19,6 +19,7 @@
>  #include <linux/seq_file.h>
> 
>  #include <linux/types.h>
> +#include <linux/srcu.h>
> 
>  struct device;
>  struct file_operations;
> @@ -44,7 +45,10 @@ extern struct dentry *arch_debugfs_dir;
>  #if defined(CONFIG_DEBUG_FS)
> 
>  /* declared over in file.c */
> -extern const struct file_operations debugfs_file_operations;
> +extern const struct file_operations debugfs_noop_file_operations;
> +extern const struct file_operations debugfs_proxy_file_operations;
> +
> +extern struct srcu_struct debugfs_srcu;
> 
>  struct dentry *debugfs_create_file(const char *name, umode_t mode,
>                                  struct dentry *parent, void *data,
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8c15b29..3929878 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -257,6 +257,7 @@ config PAGE_OWNER
> 
>  config DEBUG_FS
>       bool "Debug Filesystem"
> +     select SRCU
>       help
>         debugfs is a virtual file system that kernel developers use to put
>         debugging files into.  Enable this option to be able to read and
> -- 
> 2.6.3
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to