On 11/26/2018 05:47 PM, Nikolay Borisov wrote:
On 26.11.18 г. 11:07 ч., Anand Jain wrote:
The fs_info::device_list_mutex and fs_info::scrub_lock creates a
nested locks in btrfs_scrub_dev(). During the lock acquire the
hierarchy is fs_info::device_list_mutex and then fs_info::scrub_lock,
so following the same reverse order during unlock, that is
fs_info::scrub_lock and then fs_info::device_list_mutex.
Signed-off-by: Anand Jain <anand.j...@oracle.com>
---
fs/btrfs/scrub.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 902819d3cf41..b1c2d1cdbd4b 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3865,7 +3865,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
devid, u64 start,
}
sctx->readonly = readonly;
dev->scrub_ctx = sctx;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/*
* checking @scrub_pause_req here, we can avoid
@@ -3875,15 +3874,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
devid, u64 start,
atomic_inc(&fs_info->scrubs_running);
mutex_unlock(&fs_info->scrub_lock);
- if (!is_dev_replace) {
- /*
- * by holding device list mutex, we can
- * kick off writing super in log tree sync.
- */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ /*
+ * by holding device list mutex, we can kick off writing super in log
+ * tree sync.
+ */
+ if (!is_dev_replace)
ret = scrub_supers(sctx, dev);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- }
+
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
Have you considered whether this change will have any negative impact
due to the fact that __scrtub_blocked_if_needed can go to sleep for
arbitrary time with device_list_mutex held now ?
You are right. I missed that point. The device_list_mutex must not be
blocked. In fact here we don't need the nested device_list_mutex and
scrub_lock at all. I have comeup with a new fix [1] below separating
them.
[1]
---------------------------------
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 902819d3cf41..db895ad23eda 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3830,42 +3830,37 @@ int btrfs_scrub_dev(struct btrfs_fs_info
*fs_info, u64 devid, u64 start,
return -EROFS;
}
- mutex_lock(&fs_info->scrub_lock);
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
- mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EIO;
}
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (dev->scrub_ctx ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS;
}
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
+ mutex_lock(&fs_info->scrub_lock);
ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) {
mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return ret;
}
sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
scrub_workers_put(fs_info);
return PTR_ERR(sctx);
}
sctx->readonly = readonly;
dev->scrub_ctx = sctx;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/*
* checking @scrub_pause_req here, we can avoid
------------------------------------------------
Will send v2.
Thanks, Anand
if (!ret)
ret = scrub_enumerate_chunks(sctx, dev, start, end);