Now that an rwsem is used by kernfs, take advantage of it to reduce lookup overhead.
If there are many lookups (possibly many negative ones) there can be a lot of overhead during path walks. To reduce lookup overhead avoid allocating a new dentry where possible. To do this stay in rcu-walk mode where possible and use the dentry cache handling of negative hashed dentries to avoid allocating (and freeing shortly after) new dentries on every negative lookup. Signed-off-by: Ian Kent <ra...@themaw.net> --- fs/kernfs/dir.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 15 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 9b315f3b20ee..f4943329e578 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1046,15 +1046,75 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) { struct kernfs_node *kn; - if (flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) { + kn = kernfs_dentry_node(dentry); + if (!kn) { + /* Negative hashed dentry, tell the VFS to switch to + * ref-walk mode and call us again so that node + * existence can be checked. + */ + if (!d_unhashed(dentry)) + return -ECHILD; + + /* Negative unhashed dentry, this shouldn't happen + * because this case occurs in rcu-walk mode after + * dentry allocation which is followed by a call + * to ->loopup(). But if it does happen the dentry + * is surely invalid. + */ + return 0; + } + + /* Since the dentry is positive (we got the kernfs node) a + * kernfs node reference was held at the time. Now if the + * dentry reference count is still greater than 0 it's still + * positive so take a reference to the node to perform an + * active check. + */ + if (d_count(dentry) <= 0 || !atomic_inc_not_zero(&kn->count)) + return -ECHILD; + + /* The kernfs node reference count was greater than 0, if + * it's active continue in rcu-walk mode. + */ + if (kernfs_active_read(kn)) { + kernfs_put(kn); + return 1; + } + + /* Otherwise, just tell the VFS to switch to ref-walk mode + * and call us again so the kernfs node can be validated. + */ + kernfs_put(kn); return -ECHILD; + } - /* Always perform fresh lookup for negatives */ - if (d_really_is_negative(dentry)) - goto out_bad_unlocked; + down_read(&kernfs_rwsem); kn = kernfs_dentry_node(dentry); - down_read(&kernfs_rwsem); + if (!kn) { + struct kernfs_node *parent; + + /* If the kernfs node can be found this is a stale negative + * hashed dentry so it must be discarded and the lookup redone. + */ + parent = kernfs_dentry_node(dentry->d_parent); + if (parent) { + const void *ns = NULL; + + if (kernfs_ns_enabled(parent)) + ns = kernfs_info(dentry->d_parent->d_sb)->ns; + kn = kernfs_find_ns(parent, dentry->d_name.name, ns); + if (kn) + goto out_bad; + } + + /* The kernfs node doesn't exist, leave the dentry negative + * and return success. + */ + goto out; + } + /* The kernfs node has been deactivated */ if (!kernfs_active_read(kn)) @@ -1072,12 +1132,11 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) if (kn->parent && kernfs_ns_enabled(kn->parent) && kernfs_info(dentry->d_sb)->ns != kn->ns) goto out_bad; - +out: up_read(&kernfs_rwsem); return 1; out_bad: up_read(&kernfs_rwsem); -out_bad_unlocked: return 0; } @@ -1092,7 +1151,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *ret; struct kernfs_node *parent = dir->i_private; struct kernfs_node *kn; - struct inode *inode; + struct inode *inode = NULL; const void *ns = NULL; down_read(&kernfs_rwsem); @@ -1102,11 +1161,9 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, kn = kernfs_find_ns(parent, dentry->d_name.name, ns); - /* no such entry */ - if (!kn || !kernfs_active(kn)) { - ret = NULL; - goto out_unlock; - } + /* no such entry, retain as negative hashed dentry */ + if (!kn || !kernfs_active(kn)) + goto out_negative; /* attach dentry and inode */ inode = kernfs_get_inode(dir->i_sb, kn); @@ -1114,10 +1171,10 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, ret = ERR_PTR(-ENOMEM); goto out_unlock; } - +out_negative: /* instantiate and hash dentry */ ret = d_splice_alias(inode, dentry); - out_unlock: +out_unlock: up_read(&kernfs_rwsem); return ret; }