This is a better solution for the problem addressed in the following
commit:

    Btrfs: update commit root on snapshot creation after orphan cleanup
    (3821f348889e506efbd268cc8149e0ebfa47c4e5)

The previous solution wasn't the best because of 2 reasons:

    1) It added another full transaction commit, which is more expensive
       than just swapping the commit root with the root;

    2) If a reboot happened after the first transaction commit (the one
       that creates the snapshot) and before the second transaction commit,
       then we would end up with the same problem if a send using that
       snapshot was requested before the first transaction commit after
       the reboot.

This change addresses those 2 issues. The second issue is addressed by
switching the commit root in the dentry lookup VFS callback, which is
also called by the snapshot/subvol creation ioctl and performs orphan
cleanup if needed. Like the vfs, the ioctl locks the parent inode too,
preventing race issues between a dentry lookup and snapshot creation.

Cc: Alex Lyakas <alex.bt...@zadarastorage.com>
Signed-off-by: Filipe Manana <fdman...@suse.com>
---

V2: Updated commit message, as original second issue  was not correct.
    Removed redundant btrfs_orphan_cleanup() call in the snapshot creation
    ioctl, as it's performed by btrfs_lookup_dentry() which is called by
    the ioctl.

 fs/btrfs/inode.c | 36 ++++++++++++++++++++++++++++++++++++
 fs/btrfs/ioctl.c | 33 ---------------------------------
 2 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1d5f0b3..4f35c6c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5227,6 +5227,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, 
struct dentry *dentry)
                        iput(inode);
                        inode = ERR_PTR(ret);
                }
+               /*
+                * If orphan cleanup did remove any orphans, it means the tree
+                * was modified and therefore the commit root is not the same as
+                * the current root anymore. This is a problem, because send
+                * uses the commit root and therefore can see inode items that
+                * don't exist in the current root anymore, and for example make
+                * calls to btrfs_iget, which will do tree lookups based on the
+                * current root and not on the commit root. Those lookups will
+                * fail, returning a -ESTALE error, and making send fail with
+                * that error. So make sure a send does not see any orphans we
+                * have just removed, and that it will see the same inodes
+                * regardless of whether a transaction commit happened before
+                * it started (meaning that the commit root will be the same as
+                * the current root) or not.
+                */
+               if (sub_root->node != sub_root->commit_root) {
+                       u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
+
+                       if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
+                               struct extent_buffer *eb;
+
+                               /*
+                                * Assert we can't have races between dentry
+                                * lookup called through the snapshot creation
+                                * ioctl and the VFS.
+                                */
+                               ASSERT(mutex_is_locked(&dir->i_mutex));
+
+                               down_write(&root->fs_info->commit_root_sem);
+                               eb = sub_root->commit_root;
+                               sub_root->commit_root =
+                                       btrfs_root_node(sub_root);
+                               up_write(&root->fs_info->commit_root_sem);
+                               free_extent_buffer(eb);
+                       }
+               }
        }
 
        return inode;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2a30ac1..ef2e073 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct 
inode *dir,
        if (ret)
                goto fail;
 
-       ret = btrfs_orphan_cleanup(pending_snapshot->snap);
-       if (ret)
-               goto fail;
-
-       /*
-        * If orphan cleanup did remove any orphans, it means the tree was
-        * modified and therefore the commit root is not the same as the
-        * current root anymore. This is a problem, because send uses the
-        * commit root and therefore can see inode items that don't exist
-        * in the current root anymore, and for example make calls to
-        * btrfs_iget, which will do tree lookups based on the current root
-        * and not on the commit root. Those lookups will fail, returning a
-        * -ESTALE error, and making send fail with that error. So make sure
-        * a send does not see any orphans we have just removed, and that it
-        * will see the same inodes regardless of whether a transaction
-        * commit happened before it started (meaning that the commit root
-        * will be the same as the current root) or not.
-        */
-       if (readonly && pending_snapshot->snap->node !=
-           pending_snapshot->snap->commit_root) {
-               trans = btrfs_join_transaction(pending_snapshot->snap);
-               if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
-                       ret = PTR_ERR(trans);
-                       goto fail;
-               }
-               if (!IS_ERR(trans)) {
-                       ret = btrfs_commit_transaction(trans,
-                                                      pending_snapshot->snap);
-                       if (ret)
-                               goto fail;
-               }
-       }
-
        inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to