[PATCH 12/15] parallel lookups machinery, part 2

2016-04-15 Thread Al Viro
From: Al Viro 

We'll need to verify that there's neither a hashed nor in-lookup
dentry with desired parent/name before adding to in-lookup set.

One possible solution would be to hold the parent's ->d_lock through
both checks, but while the in-lookup set is relatively small at any
time, dcache is not.  And holding the parent's ->d_lock through
something like __d_lookup_rcu() would suck too badly.

So we leave the parent's ->d_lock alone, which means that we watch
out for the following scenario:
* we verify that there's no hashed match
* existing in-lookup match gets hashed by another process
* we verify that there's no in-lookup matches and decide
that everything's fine.

Solution: per-directory kinda-sorta seqlock, bumped around the times
we hash something that used to be in-lookup or move (and hash)
something in place of in-lookup.  Then the above would turn into
* read the counter
* do dcache lookup
* if no matches found, check for in-lookup matches
* if there had been none of those either, check if the
counter has changed; repeat if it has.

The "kinda-sorta" part is due to the fact that we don't have much spare
space in inode.  There is a spare word (shared with i_bdev/i_cdev/i_pipe),
so the counter part is not a problem, but spinlock is a different story.

We could use the parent's ->d_lock, and it would be less painful in
terms of contention, for __d_add() it would be rather inconvenient to
grab; we could do that (using lock_parent()), but...

Fortunately, we can get serialization on the counter itself, and it
might be a good idea in general; we can use cmpxchg() in a loop to
get from even to odd and smp_store_release() from odd to even.

This commit adds the counter and updating logics; the readers will be
added in the next commit.

Signed-off-by: Al Viro 
---
 fs/dcache.c| 34 --
 fs/inode.c |  1 +
 include/linux/fs.h |  1 +
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 5cea3cb..3959f18 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2361,6 +2361,22 @@ void d_rehash(struct dentry * entry)
 }
 EXPORT_SYMBOL(d_rehash);
 
+static inline unsigned start_dir_add(struct inode *dir)
+{
+
+   for (;;) {
+   unsigned n = dir->i_dir_seq;
+   if (!(n & 1) && cmpxchg(>i_dir_seq, n, n + 1) == n)
+   return n;
+   cpu_relax();
+   }
+}
+
+static inline void end_dir_add(struct inode *dir, unsigned n)
+{
+   smp_store_release(>i_dir_seq, n + 2);
+}
+
 void __d_not_in_lookup(struct dentry *dentry)
 {
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
@@ -2371,9 +2387,14 @@ void __d_not_in_lookup(struct dentry *dentry)
 
 static inline void __d_add(struct dentry *dentry, struct inode *inode)
 {
+   struct inode *dir = NULL;
+   unsigned n;
spin_lock(>d_lock);
-   if (unlikely(dentry->d_flags & DCACHE_PAR_LOOKUP))
+   if (unlikely(dentry->d_flags & DCACHE_PAR_LOOKUP)) {
+   dir = dentry->d_parent->d_inode;
+   n = start_dir_add(dir);
__d_not_in_lookup(dentry);
+   }
if (inode) {
unsigned add_flags = d_flags_for_inode(inode);
hlist_add_head(>d_u.d_alias, >i_dentry);
@@ -2383,6 +2404,8 @@ static inline void __d_add(struct dentry *dentry, struct 
inode *inode)
__fsnotify_d_instantiate(dentry);
}
_d_rehash(dentry);
+   if (dir)
+   end_dir_add(dir, n);
spin_unlock(>d_lock);
if (inode)
spin_unlock(>i_lock);
@@ -2612,6 +2635,8 @@ static void dentry_unlock_for_move(struct dentry *dentry, 
struct dentry *target)
 static void __d_move(struct dentry *dentry, struct dentry *target,
 bool exchange)
 {
+   struct inode *dir = NULL;
+   unsigned n;
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
@@ -2619,8 +2644,11 @@ static void __d_move(struct dentry *dentry, struct 
dentry *target,
BUG_ON(d_ancestor(target, dentry));
 
dentry_lock_for_move(dentry, target);
-   if (unlikely(target->d_flags & DCACHE_PAR_LOOKUP))
+   if (unlikely(target->d_flags & DCACHE_PAR_LOOKUP)) {
+   dir = target->d_parent->d_inode;
+   n = start_dir_add(dir);
__d_not_in_lookup(target);
+   }
 
write_seqcount_begin(>d_seq);
write_seqcount_begin_nested(>d_seq, DENTRY_D_LOCK_NESTED);
@@ -2670,6 +2698,8 @@ static void __d_move(struct dentry *dentry, struct dentry 
*target,
write_seqcount_end(>d_seq);
write_seqcount_end(>d_seq);
 
+   if (dir)
+   end_dir_add(dir, n);
dentry_unlock_for_move(dentry, target);
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index 4202aac..4b884f7 100644
--- a/fs/inode.c
+++ 

[PATCH 12/15] parallel lookups machinery, part 2

2016-04-15 Thread Al Viro
From: Al Viro 

We'll need to verify that there's neither a hashed nor in-lookup
dentry with desired parent/name before adding to in-lookup set.

One possible solution would be to hold the parent's ->d_lock through
both checks, but while the in-lookup set is relatively small at any
time, dcache is not.  And holding the parent's ->d_lock through
something like __d_lookup_rcu() would suck too badly.

So we leave the parent's ->d_lock alone, which means that we watch
out for the following scenario:
* we verify that there's no hashed match
* existing in-lookup match gets hashed by another process
* we verify that there's no in-lookup matches and decide
that everything's fine.

Solution: per-directory kinda-sorta seqlock, bumped around the times
we hash something that used to be in-lookup or move (and hash)
something in place of in-lookup.  Then the above would turn into
* read the counter
* do dcache lookup
* if no matches found, check for in-lookup matches
* if there had been none of those either, check if the
counter has changed; repeat if it has.

The "kinda-sorta" part is due to the fact that we don't have much spare
space in inode.  There is a spare word (shared with i_bdev/i_cdev/i_pipe),
so the counter part is not a problem, but spinlock is a different story.

We could use the parent's ->d_lock, and it would be less painful in
terms of contention, for __d_add() it would be rather inconvenient to
grab; we could do that (using lock_parent()), but...

Fortunately, we can get serialization on the counter itself, and it
might be a good idea in general; we can use cmpxchg() in a loop to
get from even to odd and smp_store_release() from odd to even.

This commit adds the counter and updating logics; the readers will be
added in the next commit.

Signed-off-by: Al Viro 
---
 fs/dcache.c| 34 --
 fs/inode.c |  1 +
 include/linux/fs.h |  1 +
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 5cea3cb..3959f18 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2361,6 +2361,22 @@ void d_rehash(struct dentry * entry)
 }
 EXPORT_SYMBOL(d_rehash);
 
+static inline unsigned start_dir_add(struct inode *dir)
+{
+
+   for (;;) {
+   unsigned n = dir->i_dir_seq;
+   if (!(n & 1) && cmpxchg(>i_dir_seq, n, n + 1) == n)
+   return n;
+   cpu_relax();
+   }
+}
+
+static inline void end_dir_add(struct inode *dir, unsigned n)
+{
+   smp_store_release(>i_dir_seq, n + 2);
+}
+
 void __d_not_in_lookup(struct dentry *dentry)
 {
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
@@ -2371,9 +2387,14 @@ void __d_not_in_lookup(struct dentry *dentry)
 
 static inline void __d_add(struct dentry *dentry, struct inode *inode)
 {
+   struct inode *dir = NULL;
+   unsigned n;
spin_lock(>d_lock);
-   if (unlikely(dentry->d_flags & DCACHE_PAR_LOOKUP))
+   if (unlikely(dentry->d_flags & DCACHE_PAR_LOOKUP)) {
+   dir = dentry->d_parent->d_inode;
+   n = start_dir_add(dir);
__d_not_in_lookup(dentry);
+   }
if (inode) {
unsigned add_flags = d_flags_for_inode(inode);
hlist_add_head(>d_u.d_alias, >i_dentry);
@@ -2383,6 +2404,8 @@ static inline void __d_add(struct dentry *dentry, struct 
inode *inode)
__fsnotify_d_instantiate(dentry);
}
_d_rehash(dentry);
+   if (dir)
+   end_dir_add(dir, n);
spin_unlock(>d_lock);
if (inode)
spin_unlock(>i_lock);
@@ -2612,6 +2635,8 @@ static void dentry_unlock_for_move(struct dentry *dentry, 
struct dentry *target)
 static void __d_move(struct dentry *dentry, struct dentry *target,
 bool exchange)
 {
+   struct inode *dir = NULL;
+   unsigned n;
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
@@ -2619,8 +2644,11 @@ static void __d_move(struct dentry *dentry, struct 
dentry *target,
BUG_ON(d_ancestor(target, dentry));
 
dentry_lock_for_move(dentry, target);
-   if (unlikely(target->d_flags & DCACHE_PAR_LOOKUP))
+   if (unlikely(target->d_flags & DCACHE_PAR_LOOKUP)) {
+   dir = target->d_parent->d_inode;
+   n = start_dir_add(dir);
__d_not_in_lookup(target);
+   }
 
write_seqcount_begin(>d_seq);
write_seqcount_begin_nested(>d_seq, DENTRY_D_LOCK_NESTED);
@@ -2670,6 +2698,8 @@ static void __d_move(struct dentry *dentry, struct dentry 
*target,
write_seqcount_end(>d_seq);
write_seqcount_end(>d_seq);
 
+   if (dir)
+   end_dir_add(dir, n);
dentry_unlock_for_move(dentry, target);
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index 4202aac..4b884f7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -151,6 +151,7 @@ int