2008/12/29 Liu Hui <onlyfl...@gmail.com>: > Hi, > This is a patch to fix discard semantic to make Btrfs work with FTL(or > SSD?). As we know, we can improve FTL's performance by telling it > which sectors are freed by file system. But if we don't tell FTL the > information of free sectors in proper time, the transaction mechanism > of Btrfs will be destroyed and Btrfs could not roll back the previous > transaction under the power loss condition. > > There are some problems in the old implementation: > 1, In __free_extent(), the pinned down extents should not be discarded. > 2, In free_extents(), the free extents are all pinned, so they need to > be discarded in transaction committing time instead of free_extents(). > 3, The reserved extent used by log tree should be discard too. > > This patch change discard behavior as follows: > 1, In __free_extent, we only discard the unpinned extent. > 2, Delay discarding the pinned extent in btrfs_finish_extent_commit() > when committing transaction. > 3, Remove discarding from free_extents() > 4, Add discard interface into btrfs_free_reserved_extent() > 5, Discard sectors before updating the free space cache, otherwise, > FTL will destroy file system data. > > -- > Thanks & Best Regards > Liu Hui > --
Hi Liu Hui, your patch is malformed, please re-send it. > > diff --git a/extent-tree.c b/extent-tree.c > index fe0e59a..7247b16 100644 > --- a/extent-tree.c > +++ b/extent-tree.c > @@ -874,6 +874,34 @@ static void btrfs_issue_discard(struct block_device > *bdev, > blkdev_issue_discard(bdev, start >> 9, len >> 9); > #endif > } > + > +static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, > + u64 num_bytes) > +{ > + int ret; > + u64 map_length = num_bytes; > + struct btrfs_multi_bio *multi = NULL; > + > + /* Tell the block device(s) that the sectors can be discarded */ > + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, > + bytenr, &map_length, &multi, 0); > + if (!ret) { > + struct btrfs_bio_stripe *stripe = multi->stripes; > + int i; > + > + if (map_length > num_bytes) > + map_length = num_bytes; > + > + for (i = 0; i < multi->num_stripes; i++, stripe++) { > + btrfs_issue_discard(stripe->dev->bdev, > + stripe->physical, > + map_length); > + } > + kfree(multi); > + } > + > + return ret; > +} > #endif > > static int noinline free_extents(struct btrfs_trans_handle *trans, > @@ -1069,10 +1097,6 @@ search: > for (pos = cur, n = pos->next; pos != end; > pos = n, n = pos->next) { > struct pending_extent_op *tmp; > -#ifdef BIO_RW_DISCARD > - u64 map_length; > - struct btrfs_multi_bio *multi = NULL; > -#endif > tmp = list_entry(pos, struct pending_extent_op, list); > > /* > @@ -1084,28 +1108,6 @@ search: > tmp->del); > BUG_ON(ret); > > -#ifdef BIO_RW_DISCARD > - map_length = tmp->num_bytes; > - ret = btrfs_map_block(&info->mapping_tree, READ, > - tmp->bytenr, &map_length, > &multi, > - 0); > - if (!ret) { > - struct btrfs_bio_stripe *stripe; > - int i; > - > - stripe = multi->stripes; > - > - if (map_length > tmp->num_bytes) > - map_length = tmp->num_bytes; > - > - for (i = 0; i < multi->num_stripes; > - i++, stripe++) > - btrfs_issue_discard(stripe->dev->bdev, > - stripe->physical, > - map_length); > - kfree(multi); > - } > -#endif > list_del_init(&tmp->list); > unlock_extent(&info->extent_ins, tmp->bytenr, > tmp->bytenr + tmp->num_bytes - 1, > @@ -2104,8 +2106,14 @@ int btrfs_finish_extent_commit(struct > btrfs_trans_handle *trans, > EXTENT_DIRTY); > if (ret) > break; > + > +#ifdef BIO_RW_DISCARD > + ret = btrfs_discard_extent(root, start, end + 1 - start); > +#endif > + > btrfs_update_pinned_extents(root, start, end + 1 - start, 0); > clear_extent_dirty(unpin, start, end, GFP_NOFS); > + > if (need_resched()) { > mutex_unlock(&root->fs_info->pinned_mutex); > cond_resched(); > @@ -2113,7 +2121,7 @@ int btrfs_finish_extent_commit(struct > btrfs_trans_handle *trans, > } > } > mutex_unlock(&root->fs_info->pinned_mutex); > - return 0; > + return ret; > } > > static int finish_current_insert(struct btrfs_trans_handle *trans, > @@ -2458,10 +2466,6 @@ static int __free_extent(struct > btrfs_trans_handle *trans, > if (refs == 0) { > u64 super_used; > u64 root_used; > -#ifdef BIO_RW_DISCARD > - u64 map_length = num_bytes; > - struct btrfs_multi_bio *multi = NULL; > -#endif > > if (pin) { > mutex_lock(&root->fs_info->pinned_mutex); > @@ -2493,28 +2497,15 @@ static int __free_extent(struct > btrfs_trans_handle *trans, > BUG_ON(ret); > } > > - ret = update_block_group(trans, root, bytenr, num_bytes, 0, > - mark_free); > - BUG_ON(ret); > #ifdef BIO_RW_DISCARD > - /* Tell the block device(s) that the sectors can be discarded > */ > - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, > - bytenr, &map_length, &multi, 0); > - if (!ret) { > - struct btrfs_bio_stripe *stripe = multi->stripes; > - int i; > - > - if (map_length > num_bytes) > - map_length = num_bytes; > - > - for (i = 0; i < multi->num_stripes; i++, stripe++) { > - btrfs_issue_discard(stripe->dev->bdev, > - stripe->physical, > - map_length); > - } > - kfree(multi); > + if (!pin) { > + ret = btrfs_discard_extent(root, bytenr, num_bytes); > } We should check 'mark_free' instead of 'pin' here > #endif > + > + ret = update_block_group(trans, root, bytenr, num_bytes, 0, > + mark_free); > + BUG_ON(ret); > } > btrfs_free_path(path); > finish_current_insert(trans, extent_root, 0); > @@ -3112,16 +3103,23 @@ again: > int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) > { > struct btrfs_block_group_cache *cache; > + int ret = 0; > > cache = btrfs_lookup_block_group(root->fs_info, start); > if (!cache) { > printk(KERN_ERR "Unable to find block group for %Lu\n", start); > return -ENOSPC; > } > + > +#ifdef BIO_RW_DISCARD > + ret = btrfs_discard_extent(root, start, len); > +#endif > + > btrfs_add_free_space(cache, start, len); > put_block_group(cache); > update_reserved_extents(root, start, len, 0); > - return 0; > + > + return ret; > } > > int btrfs_reserve_extent(struct btrfs_trans_handle *trans, -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html