On thu, 27 Sep 2012 16:45:51 +0800, Miao Xie wrote: > In some places(such as: evicting inode), we just can not flush the reserved > space of delalloc, flushing the delayed directory index and delayed inode > is OK, but we don't try to flush those things and just go back when there is > no enough space to be reserved. This patch fixes this problem. > > We defined 3 types of the flush operations: NO_FLUSH, FLUSH_LIMIT and > FLUSH_ALL. > If we can in the transaction, we should not flush anything, or the deadlock > would happen, so use NO_FLUSH. If we flushing the reserved space of delalloc > would cause deadlock, use FLUSH_LIMIT. In the other cases, FLUSH_ALL is used, > and we will flush all things. > > Signed-off-by: Miao Xie <mi...@cn.fujitsu.com> > --- > This is based on btrfs-next tree.
Sorry, I forget to say that this patch is against: [PATCH] Btrfs: fix wrong calculation of the available space when reserving the space Both this patch and the above patch are based on btrfs-next tree. Thanks Miao > --- > fs/btrfs/ctree.h | 26 +++++++++----- > fs/btrfs/delayed-inode.c | 6 ++- > fs/btrfs/extent-tree.c | 85 ++++++++++++++++++--------------------------- > fs/btrfs/inode-map.c | 5 ++- > fs/btrfs/inode.c | 8 +++-- > fs/btrfs/relocation.c | 12 ++++-- > fs/btrfs/transaction.c | 30 +++++++--------- > fs/btrfs/transaction.h | 2 +- > 8 files changed, 85 insertions(+), 89 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index dbb461f..cb59e9b 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -2870,6 +2870,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle > *trans, > u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); > u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); > void btrfs_clear_space_info_full(struct btrfs_fs_info *info); > + > +enum btrfs_reserve_flush_enum { > + /* If we are in the transaction, we can't flush anything.*/ > + BTRFS_RESERVE_NO_FLUSH, > + /* > + * Flushing delalloc may cause deadlock somewhere, in this > + * case, use FLUSH LIMIT > + */ > + BTRFS_RESERVE_FLUSH_LIMIT, > + BTRFS_RESERVE_FLUSH_ALL, > +}; > + > int btrfs_check_data_free_space(struct inode *inode, u64 bytes); > void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); > void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, > @@ -2889,19 +2901,13 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct > btrfs_root *root, > void btrfs_free_block_rsv(struct btrfs_root *root, > struct btrfs_block_rsv *rsv); > int btrfs_block_rsv_add(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 num_bytes); > -int btrfs_block_rsv_add_noflush(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 num_bytes); > + struct btrfs_block_rsv *block_rsv, u64 num_bytes, > + enum btrfs_reserve_flush_enum flush); > int btrfs_block_rsv_check(struct btrfs_root *root, > struct btrfs_block_rsv *block_rsv, int min_factor); > int btrfs_block_rsv_refill(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 min_reserved); > -int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 min_reserved); > + struct btrfs_block_rsv *block_rsv, u64 min_reserved, > + enum btrfs_reserve_flush_enum flush); > int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, > struct btrfs_block_rsv *dst_rsv, > u64 num_bytes); > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c > index eb768c4..2e2eddb 100644 > --- a/fs/btrfs/delayed-inode.c > +++ b/fs/btrfs/delayed-inode.c > @@ -651,7 +651,8 @@ static int btrfs_delayed_inode_reserve_metadata( > */ > if (!src_rsv || (!trans->bytes_reserved && > src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { > - ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); > + ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, > + BTRFS_RESERVE_NO_FLUSH); > /* > * Since we're under a transaction reserve_metadata_bytes could > * try to commit the transaction which will make it return > @@ -686,7 +687,8 @@ static int btrfs_delayed_inode_reserve_metadata( > * reserve something strictly for us. If not be a pain and try > * to steal from the delalloc block rsv. > */ > - ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); > + ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, > + BTRFS_RESERVE_NO_FLUSH); > if (!ret) > goto out; > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 8a01087..73b0255 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -3851,24 +3851,31 @@ static int flush_space(struct btrfs_root *root, > */ > static int reserve_metadata_bytes(struct btrfs_root *root, > struct btrfs_block_rsv *block_rsv, > - u64 orig_bytes, int flush) > + u64 orig_bytes, > + enum btrfs_reserve_flush_enum flush) > { > struct btrfs_space_info *space_info = block_rsv->space_info; > u64 used; > u64 num_bytes = orig_bytes; > - int flush_state = FLUSH_DELALLOC; > + int flush_state; > int ret = 0; > bool flushing = false; > bool committed = false; > > + if (flush == BTRFS_RESERVE_FLUSH_ALL) > + flush_state = FLUSH_DELALLOC; > + else > + flush_state = FLUSH_DELAYED_ITEMS_NR; > + > again: > ret = 0; > spin_lock(&space_info->lock); > /* > - * We only want to wait if somebody other than us is flushing and we are > - * actually alloed to flush. > + * We only want to wait if somebody other than us is flushing and we > + * are actually alloed to flush all things. > */ > - while (flush && !flushing && space_info->flush) { > + while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && > + space_info->flush) { > spin_unlock(&space_info->lock); > /* > * If we have a trans handle we can't wait because the flusher > @@ -3932,7 +3939,8 @@ again: > */ > avail = (space_info->total_bytes - space_info->bytes_used) * 8; > do_div(avail, 10); > - if (space_info->bytes_pinned >= avail && flush && !committed) { > + if (space_info->bytes_pinned >= avail && > + flush != BTRFS_RESERVE_NO_FLUSH && !committed) { > space_info->flush = 1; > flushing = true; > spin_unlock(&space_info->lock); > @@ -3957,11 +3965,11 @@ again: > avail >>= 1; > > /* > - * If we aren't flushing don't let us overcommit too much, say > - * 1/8th of the space. If we can flush, let it overcommit up to > - * 1/2 of the space. > + * If we aren't flushing all things, don't let us overcommit > + * too much, say 1/8th of the space. If we can flush all > + * things, let it overcommit up to 1/2 of the space. > */ > - if (flush) > + if (flush == BTRFS_RESERVE_FLUSH_ALL) > avail >>= 1; > else > avail >>= 3; > @@ -3980,14 +3988,14 @@ again: > * to reclaim space we can actually use it instead of somebody else > * stealing it from us. > */ > - if (ret && flush) { > + if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { > flushing = true; > space_info->flush = 1; > } > > spin_unlock(&space_info->lock); > > - if (!ret || !flush) > + if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) > goto out; > > ret = flush_space(root, space_info, num_bytes, orig_bytes, > @@ -4144,9 +4152,9 @@ void btrfs_free_block_rsv(struct btrfs_root *root, > kfree(rsv); > } > > -static inline int __block_rsv_add(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 num_bytes, int flush) > +int btrfs_block_rsv_add(struct btrfs_root *root, > + struct btrfs_block_rsv *block_rsv, u64 num_bytes, > + enum btrfs_reserve_flush_enum flush) > { > int ret; > > @@ -4162,20 +4170,6 @@ static inline int __block_rsv_add(struct btrfs_root > *root, > return ret; > } > > -int btrfs_block_rsv_add(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 num_bytes) > -{ > - return __block_rsv_add(root, block_rsv, num_bytes, 1); > -} > - > -int btrfs_block_rsv_add_noflush(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 num_bytes) > -{ > - return __block_rsv_add(root, block_rsv, num_bytes, 0); > -} > - > int btrfs_block_rsv_check(struct btrfs_root *root, > struct btrfs_block_rsv *block_rsv, int min_factor) > { > @@ -4194,9 +4188,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, > return ret; > } > > -static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 min_reserved, int flush) > +int btrfs_block_rsv_refill(struct btrfs_root *root, > + struct btrfs_block_rsv *block_rsv, u64 min_reserved, > + enum btrfs_reserve_flush_enum flush) > { > u64 num_bytes = 0; > int ret = -ENOSPC; > @@ -4224,20 +4218,6 @@ static inline int __btrfs_block_rsv_refill(struct > btrfs_root *root, > return ret; > } > > -int btrfs_block_rsv_refill(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 min_reserved) > -{ > - return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); > -} > - > -int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, > - struct btrfs_block_rsv *block_rsv, > - u64 min_reserved) > -{ > - return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); > -} > - > int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, > struct btrfs_block_rsv *dst_rsv, > u64 num_bytes) > @@ -4528,14 +4508,15 @@ int btrfs_delalloc_reserve_metadata(struct inode > *inode, u64 num_bytes) > u64 csum_bytes; > unsigned nr_extents = 0; > int extra_reserve = 0; > - int flush = 1; > + enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; > int ret; > > /* Need to be holding the i_mutex here if we aren't free space cache */ > if (btrfs_is_free_space_inode(inode)) > - flush = 0; > + flush = BTRFS_RESERVE_NO_FLUSH; > > - if (flush && btrfs_transaction_in_commit(root->fs_info)) > + if (flush != BTRFS_RESERVE_NO_FLUSH && > + btrfs_transaction_in_commit(root->fs_info)) > schedule_timeout(1); > > mutex_lock(&BTRFS_I(inode)->delalloc_mutex); > @@ -6297,7 +6278,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, > block_rsv = get_block_rsv(trans, root); > > if (block_rsv->size == 0) { > - ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); > + ret = reserve_metadata_bytes(root, block_rsv, blocksize, > + BTRFS_RESERVE_NO_FLUSH); > /* > * If we couldn't reserve metadata bytes try and use some from > * the global reserve. > @@ -6324,7 +6306,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, > printk(KERN_DEBUG "btrfs: block rsv returned %d\n", > ret); > WARN_ON(1); > } > - ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); > + ret = reserve_metadata_bytes(root, block_rsv, blocksize, > + BTRFS_RESERVE_NO_FLUSH); > if (!ret) { > return block_rsv; > } else if (ret && block_rsv != global_rsv) { > diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c > index b1a1c92..d26f67a 100644 > --- a/fs/btrfs/inode-map.c > +++ b/fs/btrfs/inode-map.c > @@ -434,8 +434,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root, > * 3 items for pre-allocation > */ > trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); > - ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv, > - trans->bytes_reserved); > + ret = btrfs_block_rsv_add(root, trans->block_rsv, > + trans->bytes_reserved, > + BTRFS_RESERVE_NO_FLUSH); > if (ret) > goto out; > trace_btrfs_space_reservation(root->fs_info, "ino_cache", > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 2135899..96ee384 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -3785,7 +3785,8 @@ void btrfs_evict_inode(struct inode *inode) > * inode item when doing the truncate. > */ > while (1) { > - ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); > + ret = btrfs_block_rsv_refill(root, rsv, min_size, > + BTRFS_RESERVE_FLUSH_LIMIT); > > /* > * Try and steal from the global reserve since we will > @@ -3803,7 +3804,7 @@ void btrfs_evict_inode(struct inode *inode) > goto no_delete; > } > > - trans = btrfs_start_transaction_noflush(root, 1); > + trans = btrfs_start_transaction_lflush(root, 1); > if (IS_ERR(trans)) { > btrfs_orphan_del(NULL, inode); > btrfs_free_block_rsv(root, rsv); > @@ -6833,7 +6834,8 @@ static int btrfs_truncate(struct inode *inode) > btrfs_add_ordered_operation(trans, root, inode); > > while (1) { > - ret = btrfs_block_rsv_refill(root, rsv, min_size); > + ret = btrfs_block_rsv_refill(root, rsv, min_size, > + BTRFS_RESERVE_FLUSH_ALL); > if (ret) { > /* > * This can only happen with the original transaction we > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c > index f193096..d3d651e 100644 > --- a/fs/btrfs/relocation.c > +++ b/fs/btrfs/relocation.c > @@ -2074,7 +2074,8 @@ static noinline_for_stack int merge_reloc_root(struct > reloc_control *rc, > BUG_ON(IS_ERR(trans)); > trans->block_rsv = rc->block_rsv; > > - ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); > + ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, > + BTRFS_RESERVE_FLUSH_ALL); > if (ret) { > BUG_ON(ret != -EAGAIN); > ret = btrfs_commit_transaction(trans, root); > @@ -2184,7 +2185,8 @@ int prepare_to_merge(struct reloc_control *rc, int err) > again: > if (!err) { > num_bytes = rc->merging_rsv_size; > - ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); > + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, > + BTRFS_RESERVE_FLUSH_ALL); > if (ret) > err = ret; > } > @@ -2459,7 +2461,8 @@ static int reserve_metadata_space(struct > btrfs_trans_handle *trans, > num_bytes = calcu_metadata_size(rc, node, 1) * 2; > > trans->block_rsv = rc->block_rsv; > - ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); > + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, > + BTRFS_RESERVE_FLUSH_ALL); > if (ret) { > if (ret == -EAGAIN) > rc->commit_transaction = 1; > @@ -3685,7 +3688,8 @@ int prepare_to_relocate(struct reloc_control *rc) > * is no reservation in transaction handle. > */ > ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, > - rc->extent_root->nodesize * 256); > + rc->extent_root->nodesize * 256, > + BTRFS_RESERVE_FLUSH_ALL); > if (ret) > return ret; > > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index 7d3fc93..23b56a5 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -289,9 +289,9 @@ static int may_wait_transaction(struct btrfs_root *root, > int type) > return 0; > } > > -static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, > - u64 num_items, int type, > - int noflush) > +static struct btrfs_trans_handle * > +start_transaction(struct btrfs_root *root, u64 num_items, int type, > + enum btrfs_reserve_flush_enum flush) > { > struct btrfs_trans_handle *h; > struct btrfs_transaction *cur_trans; > @@ -325,14 +325,9 @@ static struct btrfs_trans_handle > *start_transaction(struct btrfs_root *root, > } > > num_bytes = btrfs_calc_trans_metadata_size(root, num_items); > - if (noflush) > - ret = btrfs_block_rsv_add_noflush(root, > - &root->fs_info->trans_block_rsv, > - num_bytes); > - else > - ret = btrfs_block_rsv_add(root, > - &root->fs_info->trans_block_rsv, > - num_bytes); > + ret = btrfs_block_rsv_add(root, > + &root->fs_info->trans_block_rsv, > + num_bytes, flush); > if (ret) > return ERR_PTR(ret); > } > @@ -396,13 +391,15 @@ got_it: > struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, > int num_items) > { > - return start_transaction(root, num_items, TRANS_START, 0); > + return start_transaction(root, num_items, TRANS_START, > + BTRFS_RESERVE_FLUSH_ALL); > } > > -struct btrfs_trans_handle *btrfs_start_transaction_noflush( > +struct btrfs_trans_handle *btrfs_start_transaction_lflush( > struct btrfs_root *root, int num_items) > { > - return start_transaction(root, num_items, TRANS_START, 1); > + return start_transaction(root, num_items, TRANS_START, > + BTRFS_RESERVE_FLUSH_LIMIT); > } > > struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) > @@ -998,8 +995,9 @@ static noinline int create_pending_snapshot(struct > btrfs_trans_handle *trans, > btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); > > if (to_reserve > 0) { > - ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv, > - to_reserve); > + ret = btrfs_block_rsv_add(root, &pending->block_rsv, > + to_reserve, > + BTRFS_RESERVE_NO_FLUSH); > if (ret) { > pending->error = ret; > goto no_free_objectid; > diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h > index 06c4929..a71d3e2 100644 > --- a/fs/btrfs/transaction.h > +++ b/fs/btrfs/transaction.h > @@ -96,7 +96,7 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle > *trans, > struct btrfs_root *root); > struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, > int num_items); > -struct btrfs_trans_handle *btrfs_start_transaction_noflush( > +struct btrfs_trans_handle *btrfs_start_transaction_lflush( > struct btrfs_root *root, int num_items); > struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); > struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root > *root); > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html