cleaner_kthread() may run at any time, in which it'll call 
btrfs_delete_unused_bgs()
to delete unused block groups. Because this work is asynchronous, it may also 
result
in false ENOSPC error. Please see below race window:

               CPU1                           |             CPU2
                                              |
|-> btrfs_alloc_data_chunk_ondemand()         |-> cleaner_kthread()
    |-> do_chunk_alloc()                      |   |
    |   assume it returns ENOSPC, which means |   |
    |   btrfs_space_info is full and have free|   |
    |   space to satisfy data request.        |   |
    |                                         |   |- > btrfs_delete_unused_bgs()
    |                                         |   |    it will decrease 
btrfs_space_info
    |                                         |   |    total_bytes and make
    |                                         |   |    btrfs_space_info is not 
full.
    |                                         |   |
In this case, we may get ENOSPC error, but btrfs_space_info is not full.

To fix this issue, in btrfs_alloc_data_chunk_ondemand(), if we need to call
do_chunk_alloc() to allocating new chunk, we should block 
btrfs_delete_unused_bgs().
So here we introduce a new struct rw_semaphore bg_delete_sem to do this job.

Signed-off-by: Wang Xiaoguang <wangxg.f...@cn.fujitsu.com>
---
 fs/btrfs/ctree.h       |  1 +
 fs/btrfs/disk-io.c     |  1 +
 fs/btrfs/extent-tree.c | 40 ++++++++++++++++++++++++++++++++++------
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7eb2913..bf0751d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -800,6 +800,7 @@ struct btrfs_fs_info {
        struct mutex cleaner_mutex;
        struct mutex chunk_mutex;
        struct mutex volume_mutex;
+       struct rw_semaphore bg_delete_sem;
 
        /*
         * this is taken to make sure we don't set block groups ro after
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60ce119..65a1465 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2676,6 +2676,7 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->ordered_operations_mutex);
        mutex_init(&fs_info->tree_log_mutex);
        mutex_init(&fs_info->chunk_mutex);
+       init_rwsem(&fs_info->bg_delete_sem);
        mutex_init(&fs_info->transaction_kthread_mutex);
        mutex_init(&fs_info->cleaner_mutex);
        mutex_init(&fs_info->volume_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index df8d756..d1f8638 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4111,6 +4111,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, 
u64 bytes)
        int ret = 0;
        int need_commit = 2;
        int have_pinned_space;
+       int have_bg_delete_sem = 0;
 
        /* make sure bytes are sectorsize aligned */
        bytes = ALIGN(bytes, root->sectorsize);
@@ -4121,8 +4122,11 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, 
u64 bytes)
        }
 
        data_sinfo = fs_info->data_sinfo;
-       if (!data_sinfo)
+       if (!data_sinfo) {
+               down_read(&root->fs_info->bg_delete_sem);
+               have_bg_delete_sem = 1;
                goto alloc;
+       }
 
 again:
        /* make sure we have enough space to handle the data first */
@@ -4134,10 +4138,21 @@ again:
        if (used + bytes > data_sinfo->total_bytes) {
                struct btrfs_trans_handle *trans;
 
+               spin_unlock(&data_sinfo->lock);
+               /*
+                * We may need to allocate new chunk, so we should block
+                * btrfs_delete_unused_bgs()
+                */
+               if (have_bg_delete_sem == 0) {
+                       down_read(&root->fs_info->bg_delete_sem);
+                       have_bg_delete_sem = 1;
+               }
+
                /*
                 * if we don't have enough free bytes in this space then we need
                 * to alloc a new chunk.
                 */
+               spin_lock(&data_sinfo->lock);
                if (!data_sinfo->full) {
                        u64 alloc_target;
 
@@ -4156,17 +4171,20 @@ alloc:
                         * the fs.
                         */
                        trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans))
+                       if (IS_ERR(trans)) {
+                               up_read(&root->fs_info->bg_delete_sem);
                                return PTR_ERR(trans);
+                       }
 
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                             alloc_target,
                                             CHUNK_ALLOC_NO_FORCE);
                        btrfs_end_transaction(trans, root);
                        if (ret < 0) {
-                               if (ret != -ENOSPC)
+                               if (ret != -ENOSPC) {
+                                       up_read(&root->fs_info->bg_delete_sem);
                                        return ret;
-                               else {
+                               } else {
                                        have_pinned_space = 1;
                                        goto commit_trans;
                                }
@@ -4200,15 +4218,19 @@ commit_trans:
                        }
 
                        trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans))
+                       if (IS_ERR(trans)) {
+                               up_read(&root->fs_info->bg_delete_sem);
                                return PTR_ERR(trans);
+                       }
                        if (have_pinned_space >= 0 ||
                            test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
                                     &trans->transaction->flags) ||
                            need_commit > 0) {
                                ret = btrfs_commit_transaction(trans, root);
-                               if (ret)
+                               if (ret) {
+                                       up_read(&root->fs_info->bg_delete_sem);
                                        return ret;
+                               }
                                /*
                                 * The cleaner kthread might still be doing iput
                                 * operations. Wait for it to finish so that
@@ -4225,6 +4247,7 @@ commit_trans:
                trace_btrfs_space_reservation(root->fs_info,
                                              "space_info:enospc",
                                              data_sinfo->flags, bytes, 1);
+               up_read(&root->fs_info->bg_delete_sem);
                return -ENOSPC;
        }
        data_sinfo->bytes_may_use += bytes;
@@ -4232,6 +4255,9 @@ commit_trans:
                                      data_sinfo->flags, bytes, 1);
        spin_unlock(&data_sinfo->lock);
 
+       if (have_bg_delete_sem == 1)
+               up_read(&root->fs_info->bg_delete_sem);
+
        return ret;
 }
 
@@ -10594,6 +10620,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info 
*fs_info)
                spin_unlock(&fs_info->unused_bgs_lock);
 
                mutex_lock(&fs_info->delete_unused_bgs_mutex);
+               down_write(&root->fs_info->bg_delete_sem);
 
                /* Don't want to race with allocators so take the groups_sem */
                down_write(&space_info->groups_sem);
@@ -10721,6 +10748,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info 
*fs_info)
 end_trans:
                btrfs_end_transaction(trans, root);
 next:
+               up_write(&root->fs_info->bg_delete_sem);
                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                btrfs_put_block_group(block_group);
                spin_lock(&fs_info->unused_bgs_lock);
-- 
2.9.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to