On Thu, Nov 09 2017, Linus Torvalds wrote:

> On Wed, Nov 8, 2017 at 7:22 PM, NeilBrown <ne...@suse.com> wrote:
>> d_move() will call __d_drop() and then __d_rehash()
>> on the dentry being moved.  This creates a small window
>> when the dentry appears to be unhashed.  Many tests
>> of d_unhashed() are made under ->d_lock and so are safe
>> from racing with this window, but some aren't.
>> In particular, getcwd() calls d_unlinked() (which calls
>> d_unhashed()) without d_lock protection, so it can race.
>
> Hmm.
>
> I see what you're doing, but I don't necessarily agree.
>
> I would actually almost prefer that we simply change __d_move() itself.
>
> The problem is that __d_move() really wants to move the hashes things
> atomically, but instead of doing that it does a "unhash and then
> rehash".
>
> How nasty would it be to just expand the calls to __d_drop/__d_rehash
> into __d_move itself, and take both has list locks at the same time
> (with the usual ordering and checking if it's the same list, of
> course).
>
>                      Linus

something like this?
I can probably do better than "b1" and "b2".
I assume target must always be hashed ??
Do you like it enough for me to make it into a real patch?
I'd probably move hlist_bl_lock_two() into list_bl.h.
I'm not generally keen on open-coding subtle code, but maybe it is
justified here.

Thanks,
NeilBrown


diff --git a/fs/dcache.c b/fs/dcache.c
index f90141387f01..1a329fedf23c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -472,6 +472,9 @@ static void dentry_lru_add(struct dentry *dentry)
  */
 void __d_drop(struct dentry *dentry)
 {
+       /* WARNING: any changes here should be reflected in __d_move()
+        * which open-codes some of this functionality.
+        */
        if (!d_unhashed(dentry)) {
                struct hlist_bl_head *b;
                /*
@@ -2380,6 +2383,9 @@ EXPORT_SYMBOL(d_delete);
 
 static void __d_rehash(struct dentry *entry)
 {
+       /* WARNING: any changes here should be reflected in __d_move()
+        * which open-codes some of this functionality.
+        */
        struct hlist_bl_head *b = d_hash(entry->d_name.hash);
        BUG_ON(!d_unhashed(entry));
        hlist_bl_lock(b);
@@ -2796,11 +2802,23 @@ static void dentry_unlock_for_move(struct dentry 
*dentry, struct dentry *target)
  * rename_lock, the i_mutex of the source and target directories,
  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
  */
+static void hlist_bl_lock_two(struct hlist_bl_head *b1, struct  hlist_bl_head 
*b2)
+{
+       if (b1 && b1 < b2)
+               hlist_bl_lock(b1);
+       if (b2)
+               hlist_bl_lock(b2);
+       if (b1 > b2)
+               hlist_bl_lock(b1);
+}
+
 static void __d_move(struct dentry *dentry, struct dentry *target,
                     bool exchange)
 {
        struct inode *dir = NULL;
        unsigned n;
+       struct hlist_bl_head *b1, *b2;
+
        if (!dentry->d_inode)
                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
@@ -2817,10 +2835,24 @@ static void __d_move(struct dentry *dentry, struct 
dentry *target,
        write_seqcount_begin(&dentry->d_seq);
        write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
 
-       /* unhash both */
-       /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
-       __d_drop(dentry);
-       __d_drop(target);
+       /* We want to unhash both, change names, then rehash one or both.
+        * If we use __d_drop() and __d_rehash() there will be a window
+        * when dentry appears to be d_unhashed() which can race with lockless
+        * checking.  So instead we open-code the important parts of __d_drop()
+        * and __d_rehash().
+        * @target must already be hashed, @dentry must be if @exchange.
+        */
+       BUG_ON(d_unhashed(dentry) && exchange);
+       BUG_ON(d_unhashed(target));
+
+       b1 = d_unhashed(dentry) ? NULL : d_hash(dentry->d_name.hash);
+       b2 = d_hash(target->d_name.hash);
+       hlist_bl_lock_two(b1, b2);
+       if (b1)
+               __hlist_bl_del(&dentry->d_hash);
+       __hlist_bl_del(&target->d_hash);
+       write_seqcount_invalidate(&dentry->d_seq);
+       write_seqcount_invalidate(&target->d_seq);
 
        /* Switch the names.. */
        if (exchange)
@@ -2829,9 +2861,14 @@ static void __d_move(struct dentry *dentry, struct 
dentry *target,
                copy_name(dentry, target);
 
        /* rehash in new place(s) */
-       __d_rehash(dentry);
+       hlist_bl_add_head_rcu(&dentry->d_hash, b2);
        if (exchange)
-               __d_rehash(target);
+               hlist_bl_add_head_rcu(&target->d_hash, b1);
+       else
+               target->d_hash.pprev = NULL;
+       if (b1 && b1 != b2)
+               hlist_bl_unlock(b1);
+       hlist_bl_unlock(b2);
 
        /* ... and switch them in the tree */
        if (IS_ROOT(dentry)) {

Attachment: signature.asc
Description: PGP signature

Reply via email to