On Wed, Oct 12, 2011 at 03:45:04PM -0500, Mitch Harder wrote: > On Wed, Oct 12, 2011 at 12:50 PM, Josef Bacik <jo...@redhat.com> wrote: > > On Tue, Oct 11, 2011 at 03:45:45PM -0500, Mitch Harder wrote: > >> On Tue, Oct 11, 2011 at 3:01 PM, Josef Bacik <jo...@redhat.com> wrote: > >> > On Tue, Oct 11, 2011 at 02:44:09PM -0500, Mitch Harder wrote: > >> >> On Tue, Oct 11, 2011 at 2:00 PM, Josef Bacik <jo...@redhat.com> wrote: > >> >> > On Tue, Oct 11, 2011 at 12:33:48PM -0500, Mitch Harder wrote: > >> >> >> On Mon, Sep 26, 2011 at 4:22 PM, Josef Bacik <jo...@redhat.com> > >> >> >> wrote: > >> >> >> > > >> >> >> > go from taking around 45 minutes to 10 seconds on my freshly > >> >> >> > formatted 3 TiB > >> >> >> > file system. This doesn't seem to break my other enospc tests, > >> >> >> > but could really > >> >> >> > use some more testing as this is a super scary change. Thanks, > >> >> >> > > >> >> >> > >> >> >> I've been testing Josef's git.kernel.org testing tree, and I've > >> >> >> bisected an error down to this commit. > >> >> >> > >> >> >> I'm triggering the error using a removedirs benchmark in filebench > >> >> >> with the following profile: > >> >> >> load removedirs > >> >> >> set $dir=/mnt/benchmark/filebench > >> >> >> set $ndirs=400000 > >> >> >> run > >> >> >> > >> >> > > >> >> > Ok try this one, it will write out more and harder, see if that > >> >> > helps. Thanks, > >> >> > > >> >> > >> >> Still running into BUG at fs/btrfs/inode.c:2176! > >> > > >> > How about this one? > >> > > >> > >> Sorry, still getting the same bug. > >> > >> [ 175.956273] kernel BUG at fs/btrfs/inode.c:2176! > > > > Ok I think I see what's happening, this patch replaces the previous one, > > let me > > know how it goes. Thanks, > > > > Getting a slightly different BUG this time: >
Ok looks like I've fixed the original problem and now we're hitting a problem with the free space cache. This patch will replace the last one, its all the fixes up to now and a new set of BUG_ON()'s to figure out which free space cache inode is screwing us up. Thanks, Josef diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc0de68..e595372 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3334,7 +3334,7 @@ out: * shrink metadata reservation for delalloc */ static int shrink_delalloc(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 to_reclaim, int sync) + struct btrfs_root *root, u64 to_reclaim, int retries) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; @@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } max_reclaim = min(reserved, to_reclaim); + if (max_reclaim > (2 * 1024 * 1024)) + nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; while (loops < 1024) { - /* have the flusher threads jump in and do some IO */ - smp_mb(); - nr_pages = min_t(unsigned long, nr_pages, - root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); spin_lock(&space_info->lock); @@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (reserved == 0 || reclaimed >= max_reclaim) break; - if (trans && trans->transaction->blocked) + if (trans) return -EAGAIN; - time_left = schedule_timeout_interruptible(1); + if (!retries) { + time_left = schedule_timeout_interruptible(1); - /* We were interrupted, exit */ - if (time_left) - break; + /* We were interrupted, exit */ + if (time_left) + break; + } else { + /* + * We've already done this song and dance once, let's + * really wait for some work to get done. + */ + btrfs_wait_ordered_extents(root, 0, 0); + } /* we've kicked the IO a few times, if anything has been freed, * exit. There is no sense in looping here for a long time @@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, * just too many writers without enough free space */ - if (loops > 3) { + if (!retries && loops > 3) { smp_mb(); if (progress != space_info->reservation_progress) break; } } - if (reclaimed < to_reclaim && !trans) - btrfs_wait_ordered_extents(root, 0, 0); return reclaimed >= to_reclaim; } @@ -3552,7 +3556,7 @@ again: * We do synchronous shrinking since we don't actually unreserve * metadata until after the IO is completed. */ - ret = shrink_delalloc(trans, root, num_bytes, 1); + ret = shrink_delalloc(trans, root, num_bytes, retries); if (ret < 0) goto out; @@ -3568,17 +3572,6 @@ again: goto again; } - /* - * Not enough space to be reclaimed, don't bother committing the - * transaction. - */ - spin_lock(&space_info->lock); - if (space_info->bytes_pinned < orig_bytes) - ret = -ENOSPC; - spin_unlock(&space_info->lock); - if (ret) - goto out; - ret = -EAGAIN; if (trans) goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6ba353..cb63904 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -782,7 +782,8 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(btrfs_is_free_space_inode(root, inode)); + BUG_ON(root == root->fs_info->tree_root); + BUG_ON(BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -2790,7 +2791,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, return ERR_PTR(-ENOMEM); } - trans = btrfs_start_transaction(root, 0); + /* 1 for the orphan item */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { btrfs_free_path(path); root->fs_info->enospc_unlink = 0; @@ -2901,6 +2903,11 @@ out: return ERR_PTR(err); } + ret = btrfs_block_rsv_migrate(trans->block_rsv, + &root->fs_info->global_block_rsv, + btrfs_calc_trans_metadata_size(root, 1)); + BUG_ON(ret); + trans->block_rsv = &root->fs_info->global_block_rsv; return trans; } -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html