On Tue, Apr 18 2017, Johannes Berg wrote: > On Sun, 2017-04-16 at 11:51 +0200, Nicolai Stange wrote: >> >> +++ b/fs/debugfs/file.c >> @@ -53,6 +53,7 @@ const struct file_operations >> *debugfs_real_fops(const struct file *filp) >> { >> struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; >> >> + WARN_ON((unsigned long)fsd & >> DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); >> return fsd->real_fops; > > I'm not a fan of BUG_ON(), but in this case, if you have a completely > bogus pointer here, and then you return fsd->real_fops which will be > even more bogus, and *then* you call a function from within it... that > seems like a recipe for disaster.
Hmm. Note that debugfs_real_fops() is provided for users outside of the debugfs core which encapsulate their file_operations in a larger struct and access this one through a container_of(debugfs_real_fops(dentry), ...). Now, what the above WARN_ON() really checks for is whether the debugfs_real_fops() has been called under a debugfs_file_get()/-_put() pair. The point of requiring this isn't protection ([1]) or anything, but merely to be able to streamline the implementation of debugfs_real_fops(): knowing that a debugfs_file_get() is active allows for the omission of the WARNed_ON case, namely that the fops are encoded in ->d_fsdata itself. > So either you could return some valid ops (perhaps > debugfs_noop_file_operations although those don't have .name or .poll, > so it doesn't cover everything), or you can just BUG_ON() here > directly, saving the incomprehensible crash later. The purpose of that WARN_ON() there was to turn a potentially incomprehensible crash into a comprehensible one ;) In order to avoid a new BUG_ON(), what about keeping the WARN_ON() as is and returning NULL instead of the garbage? That would crash current on first access and the earlier warning would hopefully give some clue? Thanks, Nicolai [1] "protection" in the sense of "protection against file removal". With the proposed [9/9] ("debugfs: free debugfs_fsdata instances"), the patch introducing occasional freeing of ->d_fsdata, an additional RCU read side section would be needed if debugfs_real_fops() were allowed to be called outside of a debugfs_file_get()/-_put() pair. >> EXPORT_SYMBOL_GPL(debugfs_real_fops); >> @@ -74,9 +75,35 @@ EXPORT_SYMBOL_GPL(debugfs_real_fops); >> */ >> int debugfs_file_get(struct dentry *dentry) >> { >> - struct debugfs_fsdata *fsd = dentry->d_fsdata; >> + struct debugfs_fsdata *fsd; >> + void *d_fsd; >> + >> + d_fsd = READ_ONCE(dentry->d_fsdata); >> + if (!((unsigned long)d_fsd & >> DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { >> + fsd = d_fsd; >> + } else { >> + fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); >> + if (!fsd) >> + return -ENOMEM; >> + >> + fsd->real_fops = (void *)((unsigned long)d_fsd & >> + ~DEBUGFS_FSDATA_IS_REAL_FOPS >> _BIT); >> + refcount_set(&fsd->active_users, 1); >> + init_completion(&fsd->active_users_drained); >> + if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) >> { >> + kfree(fsd); >> + fsd = READ_ONCE(dentry->d_fsdata); >> + } >> + } >> >> - /* Avoid starvation of removers. */ >> + /* >> + * In case of a successful cmpxchg() above, this check is >> + * strictly necessary and must follow it, see the comment in >> + * __debugfs_remove_file(). >> + * OTOH, if the cmpxchg() hasn't been executed or wasn't >> + * successful, this serves the purpose of not starving >> + * removers. >> + */ >> if (d_unlinked(dentry)) >> return -EIO; >> >> @@ -98,7 +125,7 @@ EXPORT_SYMBOL_GPL(debugfs_file_get); >> */ >> void debugfs_file_put(struct dentry *dentry) >> { >> - struct debugfs_fsdata *fsd = dentry->d_fsdata; >> + struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); >> >> if (refcount_dec_and_test(&fsd->active_users)) >> complete(&fsd->active_users_drained); >> @@ -109,10 +136,11 @@ static int open_proxy_open(struct inode *inode, >> struct file *filp) >> { >> struct dentry *dentry = F_DENTRY(filp); >> const struct file_operations *real_fops = NULL; >> - int r = 0; >> + int r; >> >> - if (debugfs_file_get(dentry)) >> - return -ENOENT; >> + r = debugfs_file_get(dentry); >> + if (r) >> + return r == -EIO ? -ENOENT : r; >> >> real_fops = debugfs_real_fops(filp); >> real_fops = fops_get(real_fops); >> @@ -233,10 +261,11 @@ static int full_proxy_open(struct inode *inode, >> struct file *filp) >> struct dentry *dentry = F_DENTRY(filp); >> const struct file_operations *real_fops = NULL; >> struct file_operations *proxy_fops = NULL; >> - int r = 0; >> + int r; >> >> - if (debugfs_file_get(dentry)) >> - return -ENOENT; >> + r = debugfs_file_get(dentry); >> + if (r) >> + return r == -EIO ? -ENOENT : r; >> >> real_fops = debugfs_real_fops(filp); >> real_fops = fops_get(real_fops); >> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c >> index 5550f11d60bd..2360c17ec00a 100644 >> --- a/fs/debugfs/inode.c >> +++ b/fs/debugfs/inode.c >> @@ -184,7 +184,10 @@ static const struct super_operations >> debugfs_super_operations = { >> >> static void debugfs_release_dentry(struct dentry *dentry) >> { >> - kfree(dentry->d_fsdata); >> + void *fsd = dentry->d_fsdata; >> + >> + if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) >> + kfree(dentry->d_fsdata); >> } >> >> static struct vfsmount *debugfs_automount(struct path *path) >> @@ -346,35 +349,25 @@ static struct dentry >> *__debugfs_create_file(const char *name, umode_t mode, >> { >> struct dentry *dentry; >> struct inode *inode; >> - struct debugfs_fsdata *fsd; >> - >> - fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); >> - if (!fsd) >> - return NULL; >> >> if (!(mode & S_IFMT)) >> mode |= S_IFREG; >> BUG_ON(!S_ISREG(mode)); >> dentry = start_creating(name, parent); >> >> - if (IS_ERR(dentry)) { >> - kfree(fsd); >> + if (IS_ERR(dentry)) >> return NULL; >> - } >> >> inode = debugfs_get_inode(dentry->d_sb); >> - if (unlikely(!inode)) { >> - kfree(fsd); >> + if (unlikely(!inode)) >> return failed_creating(dentry); >> - } >> >> inode->i_mode = mode; >> inode->i_private = data; >> >> inode->i_fop = proxy_fops; >> - fsd->real_fops = real_fops; >> - refcount_set(&fsd->active_users, 1); >> - dentry->d_fsdata = fsd; >> + dentry->d_fsdata = (void *)((unsigned long)real_fops | >> + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); >> >> d_instantiate(dentry, inode); >> fsnotify_create(d_inode(dentry->d_parent), dentry); >> @@ -637,8 +630,17 @@ static void __debugfs_remove_file(struct dentry >> *dentry, struct dentry *parent) >> >> simple_unlink(d_inode(parent), dentry); >> d_delete(dentry); >> - fsd = dentry->d_fsdata; >> - init_completion(&fsd->active_users_drained); >> + >> + /* >> + * Paired with the closing smp_mb() implied by a successful >> + * cmpxchg() in debugfs_file_get(): either >> + * debugfs_file_get() must see a dead dentry or we must see >> a >> + * debugfs_fsdata instance at ->d_fsdata here (or both). >> + */ >> + smp_mb(); >> + fsd = READ_ONCE(dentry->d_fsdata); >> + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) >> + return; >> if (!refcount_dec_and_test(&fsd->active_users)) >> wait_for_completion(&fsd->active_users_drained); >> } >> diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h >> index 0eea99432840..cb1e8139c398 100644 >> --- a/fs/debugfs/internal.h >> +++ b/fs/debugfs/internal.h >> @@ -25,4 +25,12 @@ struct debugfs_fsdata { >> struct completion active_users_drained; >> }; >> >> +/* >> + * A dentry's ->d_fsdata either points to the real fops or to a >> + * dynamically allocated debugfs_fsdata instance. >> + * In order to distinguish between these two cases, a real fops >> + * pointer gets its lowest bit set. >> + */ >> +#define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0) >> + >> #endif /* _DEBUGFS_INTERNAL_H_ */