On 11/26/2018 05:47 PM, Nikolay Borisov wrote:


On 26.11.18 г. 11:07 ч., Anand Jain wrote:
The fs_info::device_list_mutex and fs_info::scrub_lock creates a
nested locks in btrfs_scrub_dev(). During the lock acquire the
hierarchy is fs_info::device_list_mutex and then fs_info::scrub_lock,
so following the same reverse order during unlock, that is
fs_info::scrub_lock and then fs_info::device_list_mutex.

Signed-off-by: Anand Jain <anand.j...@oracle.com>
---
  fs/btrfs/scrub.c | 16 +++++++---------
  1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 902819d3cf41..b1c2d1cdbd4b 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3865,7 +3865,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
devid, u64 start,
        }
        sctx->readonly = readonly;
        dev->scrub_ctx = sctx;
-       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/*
         * checking @scrub_pause_req here, we can avoid
@@ -3875,15 +3874,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
devid, u64 start,
        atomic_inc(&fs_info->scrubs_running);
        mutex_unlock(&fs_info->scrub_lock);
- if (!is_dev_replace) {
-               /*
-                * by holding device list mutex, we can
-                * kick off writing super in log tree sync.
-                */
-               mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       /*
+        * by holding device list mutex, we can kick off writing super in log
+        * tree sync.
+        */
+       if (!is_dev_replace)
                ret = scrub_supers(sctx, dev);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-       }
+
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);

Have you considered whether this change will have any negative impact
due to the fact that __scrtub_blocked_if_needed can go to sleep for
arbitrary time with device_list_mutex held now ?

 You are right. I missed that point. The device_list_mutex must not be
 blocked. In fact here we don't need the nested device_list_mutex and
 scrub_lock at all. I have comeup with a new fix [1] below separating
 them.

[1]
---------------------------------
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 902819d3cf41..db895ad23eda 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3830,42 +3830,37 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
                return -EROFS;
        }

-       mutex_lock(&fs_info->scrub_lock);
        if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
            test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
-               mutex_unlock(&fs_info->scrub_lock);
                mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                return -EIO;
        }
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);

        btrfs_dev_replace_read_lock(&fs_info->dev_replace);
        if (dev->scrub_ctx ||
            (!is_dev_replace &&
             btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
                btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
-               mutex_unlock(&fs_info->scrub_lock);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                return -EINPROGRESS;
        }
        btrfs_dev_replace_read_unlock(&fs_info->dev_replace);

+       mutex_lock(&fs_info->scrub_lock);
        ret = scrub_workers_get(fs_info, is_dev_replace);
        if (ret) {
                mutex_unlock(&fs_info->scrub_lock);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                return ret;
        }

        sctx = scrub_setup_ctx(dev, is_dev_replace);
        if (IS_ERR(sctx)) {
                mutex_unlock(&fs_info->scrub_lock);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                scrub_workers_put(fs_info);
                return PTR_ERR(sctx);
        }
        sctx->readonly = readonly;
        dev->scrub_ctx = sctx;
-       mutex_unlock(&fs_info->fs_devices->device_list_mutex);

        /*
         * checking @scrub_pause_req here, we can avoid
------------------------------------------------

Will send v2.

Thanks, Anand




if (!ret)
                ret = scrub_enumerate_chunks(sctx, dev, start, end);

Reply via email to