Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!

Josef Bacik Thu, 13 Oct 2011 05:57:58 -0700

On Wed, Oct 12, 2011 at 03:45:04PM -0500, Mitch Harder wrote:
> On Wed, Oct 12, 2011 at 12:50 PM, Josef Bacik <jo...@redhat.com> wrote:
> > On Tue, Oct 11, 2011 at 03:45:45PM -0500, Mitch Harder wrote:
> >> On Tue, Oct 11, 2011 at 3:01 PM, Josef Bacik <jo...@redhat.com> wrote:
> >> > On Tue, Oct 11, 2011 at 02:44:09PM -0500, Mitch Harder wrote:
> >> >> On Tue, Oct 11, 2011 at 2:00 PM, Josef Bacik <jo...@redhat.com> wrote:
> >> >> > On Tue, Oct 11, 2011 at 12:33:48PM -0500, Mitch Harder wrote:
> >> >> >> On Mon, Sep 26, 2011 at 4:22 PM, Josef Bacik <jo...@redhat.com> 
> >> >> >> wrote:
> >> >> >> >
> >> >> >> > go from taking around 45 minutes to 10 seconds on my freshly 
> >> >> >> > formatted 3 TiB
> >> >> >> > file system.  This doesn't seem to break my other enospc tests, 
> >> >> >> > but could really
> >> >> >> > use some more testing as this is a super scary change.  Thanks,
> >> >> >> >
> >> >> >>
> >> >> >> I've been testing Josef's git.kernel.org testing tree, and I've
> >> >> >> bisected an error down to this commit.
> >> >> >>
> >> >> >> I'm triggering the error using a removedirs benchmark in filebench
> >> >> >> with the following profile:
> >> >> >> load removedirs
> >> >> >> set $dir=/mnt/benchmark/filebench
> >> >> >> set $ndirs=400000
> >> >> >> run
> >> >> >>
> >> >> >
> >> >> > Ok try this one, it will write out more and harder, see if that 
> >> >> > helps.  Thanks,
> >> >> >
> >> >>
> >> >> Still running into BUG at fs/btrfs/inode.c:2176!
> >> >
> >> > How about this one?
> >> >
> >>
> >> Sorry, still getting the same bug.
> >>
> >> [  175.956273] kernel BUG at fs/btrfs/inode.c:2176!
> >
> > Ok I think I see what's happening, this patch replaces the previous one, 
> > let me
> > know how it goes.  Thanks,
> >
> 
> Getting a slightly different BUG this time:
>


Ok looks like I've fixed the original problem and now we're hitting a problem
with the free space cache.  This patch will replace the last one, its all the
fixes up to now and a new set of BUG_ON()'s to figure out which free space cache
inode is screwing us up.  Thanks,

Josef


diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fc0de68..e595372 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3334,7 +3334,7 @@ out:
  * shrink metadata reservation for delalloc
  */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 to_reclaim, int sync)
+                          struct btrfs_root *root, u64 to_reclaim, int retries)
 {
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_space_info *space_info;
@@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_handle 
*trans,
        }
 
        max_reclaim = min(reserved, to_reclaim);
+       if (max_reclaim > (2 * 1024 * 1024))
+               nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
 
        while (loops < 1024) {
-               /* have the flusher threads jump in and do some IO */
-               smp_mb();
-               nr_pages = min_t(unsigned long, nr_pages,
-                      root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 
                spin_lock(&space_info->lock);
@@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_handle 
*trans,
                if (reserved == 0 || reclaimed >= max_reclaim)
                        break;
 
-               if (trans && trans->transaction->blocked)
+               if (trans)
                        return -EAGAIN;
 
-               time_left = schedule_timeout_interruptible(1);
+               if (!retries) {
+                       time_left = schedule_timeout_interruptible(1);
 
-               /* We were interrupted, exit */
-               if (time_left)
-                       break;
+                       /* We were interrupted, exit */
+                       if (time_left)
+                               break;
+               } else {
+                       /*
+                        * We've already done this song and dance once, let's
+                        * really wait for some work to get done.
+                        */
+                       btrfs_wait_ordered_extents(root, 0, 0);
+               }
 
                /* we've kicked the IO a few times, if anything has been freed,
                 * exit.  There is no sense in looping here for a long time
@@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_handle 
*trans,
                 * just too many writers without enough free space
                 */
 
-               if (loops > 3) {
+               if (!retries && loops > 3) {
                        smp_mb();
                        if (progress != space_info->reservation_progress)
                                break;
                }
 
        }
-       if (reclaimed < to_reclaim && !trans)
-               btrfs_wait_ordered_extents(root, 0, 0);
        return reclaimed >= to_reclaim;
 }
 
@@ -3552,7 +3556,7 @@ again:
         * We do synchronous shrinking since we don't actually unreserve
         * metadata until after the IO is completed.
         */
-       ret = shrink_delalloc(trans, root, num_bytes, 1);
+       ret = shrink_delalloc(trans, root, num_bytes, retries);
        if (ret < 0)
                goto out;
 
@@ -3568,17 +3572,6 @@ again:
                goto again;
        }
 
-       /*
-        * Not enough space to be reclaimed, don't bother committing the
-        * transaction.
-        */
-       spin_lock(&space_info->lock);
-       if (space_info->bytes_pinned < orig_bytes)
-               ret = -ENOSPC;
-       spin_unlock(&space_info->lock);
-       if (ret)
-               goto out;
-
        ret = -EAGAIN;
        if (trans)
                goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d6ba353..cb63904 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -782,7 +782,8 @@ static noinline int cow_file_range(struct inode *inode,
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
-       BUG_ON(btrfs_is_free_space_inode(root, inode));
+       BUG_ON(root == root->fs_info->tree_root);
+       BUG_ON(BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID);
        trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -2790,7 +2791,8 @@ static struct btrfs_trans_handle 
*__unlink_start_trans(struct inode *dir,
                return ERR_PTR(-ENOMEM);
        }
 
-       trans = btrfs_start_transaction(root, 0);
+       /* 1 for the orphan item */
+       trans = btrfs_start_transaction(root, 1);
        if (IS_ERR(trans)) {
                btrfs_free_path(path);
                root->fs_info->enospc_unlink = 0;
@@ -2901,6 +2903,11 @@ out:
                return ERR_PTR(err);
        }
 
+       ret = btrfs_block_rsv_migrate(trans->block_rsv,
+                                     &root->fs_info->global_block_rsv,
+                                     btrfs_calc_trans_metadata_size(root, 1));
+       BUG_ON(ret);
+
        trans->block_rsv = &root->fs_info->global_block_rsv;
        return trans;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!

Reply via email to