Considering both end_dir_add() and d_alloc_parallel(), the
dir->i_dir_seq wants acquire/release semantics, therefore
micro-optimize for ll/sc archs and use finer grained barriers
to provide (load)-ACQUIRE ordering (L->S + L->L). This comes
at no additional cost for most of x86, as sane tso models will
have a nop for smp_rmb/smp_acquire__after_ctrl_dep.

Signed-off-by: Davidlohr Bueso <dbu...@suse.de>
---
Alternatively I guess we could just use cmpxchg_acquire().

 fs/dcache.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index ea0485861d93..22738daccb9c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2502,13 +2502,18 @@ EXPORT_SYMBOL(d_rehash);
 
 static inline unsigned start_dir_add(struct inode *dir)
 {
+       unsigned n;
 
        for (;;) {
-               unsigned n = dir->i_dir_seq;
-               if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
-                       return n;
+               n = READ_ONCE(dir->i_dir_seq);
+               if (!(n & 1) && cmpxchg_relaxed(&dir->i_dir_seq, n, n + 1) == n)
+                       break;
                cpu_relax();
        }
+
+       /* create (load)-ACQUIRE ordering */
+       smp_acquire__after_ctrl_dep();
+       return n;
 }
 
 static inline void end_dir_add(struct inode *dir, unsigned n)
-- 
2.26.2

Reply via email to