On Mon, Aug 5, 2019 at 3:48 PM Nikolay Borisov <nbori...@suse.com> wrote: > > run_delalloc_nocow contains numerous, somewhat subtle, checks when > figuring out whether a particular extent should be CoW'ed or not. This > patch explicitly states the assumptions those checks verify. As a > result also document 2 of the more subtle checks in check_committed_ref > as well. > > Signed-off-by: Nikolay Borisov <nbori...@suse.com> > --- > fs/btrfs/extent-tree.c | 3 +++ > fs/btrfs/inode.c | 50 ++++++++++++++++++++++++++++++++++++++---- > 2 files changed, 49 insertions(+), 4 deletions(-) > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 163db9a560e2..1fc795fda4c2 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -2868,16 +2868,19 @@ static noinline int check_committed_ref(struct > btrfs_root *root, > item_size = btrfs_item_size_nr(leaf, path->slots[0]); > ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); > > + /* If extent item has more than 1 inline ref then it has references */ > if (item_size != sizeof(*ei) + > btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) > goto out; > > + /* If extent created before last snapshot => it's definitely > referenced */
Extents are always referenced, otherwise we have a corruption, so: referenced -> shared > if (btrfs_extent_generation(leaf, ei) <= > btrfs_root_last_snapshot(&root->root_item)) > goto out; > > iref = (struct btrfs_extent_inline_ref *)(ei + 1); > > + /* If this extent has SHARED_DATA_REF then it's referenced */ referenced -> shared > type = btrfs_get_extent_inline_ref_type(leaf, iref, > BTRFS_REF_TYPE_DATA); > if (type != BTRFS_EXTENT_DATA_REF_KEY) > goto out; > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 4393f986e5ad..aa5e017e31ab 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -1345,6 +1345,11 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > cur_offset, 0); > if (ret < 0) > goto error; > + > + /* > + * If nothing found and this is the initial search get the > last > + * extent of this file Last extent? No, we want to check if the previous slot points to an extent item, and if so, use it. That does not mean it's the last extent in the file, it can be any in the middle or in the beginning. This is hit when there's no extent that starts at the given search offset, so we end up in the next one and we have to go 1 slot behind because the previous one contains the search offset. > + */ > if (ret > 0 && path->slots[0] > 0 && check_prev) { > leaf = path->nodes[0]; > btrfs_item_key_to_cpu(leaf, &found_key, > @@ -1353,9 +1358,9 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > found_key.type == BTRFS_EXTENT_DATA_KEY) > path->slots[0]--; > } > - /* Check previous only on first pass */ Previous patch added this comment, this one removes it. Why? > check_prev = false; > next_slot: > + /* If we are beyond end of leaf break */ No, we don't break. We iterate to the next leaf. We break only if there isn't a next leaf (we are at the last one). > leaf = path->nodes[0]; > if (path->slots[0] >= btrfs_header_nritems(leaf)) { > ret = btrfs_next_leaf(root, path); > @@ -1371,23 +1376,39 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > > btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); > > + /* Didn't find anything for our INO */ > if (found_key.objectid > ino) > break; > + /* > + * Found a different inode or no extents for our file, > + * goto next slot No. This does not mean that there are no extents for the file. If there weren't any, we would break instead of iterating to the next slot. One example described at https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1d512cb77bdbda80f0dd0620a3b260d697fd581d > + */ > if (WARN_ON_ONCE(found_key.objectid < ino) || > found_key.type < BTRFS_EXTENT_DATA_KEY) { > path->slots[0]++; > goto next_slot; > } > + > + /* Found key is not EXTENT_DATA_KEY or starts after req range > */ > if (found_key.type > BTRFS_EXTENT_DATA_KEY || > found_key.offset > end) > break; > > + /* > + * If the found extent starts after requested offset, then > + * adjust extent_end to be right before this extent begins > + */ > if (found_key.offset > cur_offset) { > extent_end = found_key.offset; > extent_type = 0; > goto out_check; > } > > + > + /* > + * Found extent which begins before our range and has the > + * potential to intersect it. > + */ > fi = btrfs_item_ptr(leaf, path->slots[0], > struct btrfs_file_extent_item); > extent_type = btrfs_file_extent_type(leaf, fi); > @@ -1401,19 +1422,28 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > btrfs_file_extent_num_bytes(leaf, fi); > disk_num_bytes = > btrfs_file_extent_disk_num_bytes(leaf, fi); > + /* > + * If extent we got ends before our range starts, > + * skip to next extent > + */ > if (extent_end <= start) { > path->slots[0]++; > goto next_slot; > } > + /* skip holes */ > if (disk_bytenr == 0) > goto out_check; > + /* skip compressed/encrypted/encoded extents */ > if (btrfs_file_extent_compression(leaf, fi) || > btrfs_file_extent_encryption(leaf, fi) || > btrfs_file_extent_other_encoding(leaf, fi)) > goto out_check; > /* > - * Do the same check as in btrfs_cross_ref_exist but > - * without the unnecessary search. > + * If extent is created before the last volume's > snapshot > + * this implies the extent is shared, hence we can't > do > + * nocow. This is the same check as in > + * btrfs_cross_ref_exist but without calling > + * btrfs_search_slot. > */ > if (!freespace_inode && > btrfs_file_extent_generation(leaf, fi) <= > @@ -1421,6 +1451,7 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > goto out_check; > if (extent_type == BTRFS_FILE_EXTENT_REG && !force) > goto out_check; > + /* If extent RO, we must CoW it */ > if (btrfs_extent_readonly(fs_info, disk_bytenr)) > goto out_check; > ret = btrfs_cross_ref_exist(root, ino, > @@ -1444,7 +1475,7 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > disk_bytenr += cur_offset - found_key.offset; > num_bytes = min(end + 1, extent_end) - cur_offset; > /* > - * if there are pending snapshots for this root, > + * If there are pending snapshots for this root, > * we fall into common COW way. > */ > if (!freespace_inode && > atomic_read(&root->snapshot_force_cow)) > @@ -1481,12 +1512,17 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > BUG(); > } > out_check: > + /* Skip extents outside of our requested range */ > if (extent_end <= start) { > path->slots[0]++; > if (nocow) > btrfs_dec_nocow_writers(fs_info, disk_bytenr); > goto next_slot; > } > + /* > + * If nocow is false then record the beginning of the range > + * that needs to be CoWed > + */ > if (!nocow) { > if (cow_start == (u64)-1) > cow_start = cur_offset; > @@ -1498,6 +1534,12 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > } > > btrfs_release_path(path); > + > + /* > + * CoW range from cow_start to found_key.offset - 1. As the > key > + * will contains the beginning of the first extent that can be > + * NOCOW, following one which needs to be CoW'ed > + */ > if (cow_start != (u64)-1) { > ret = cow_file_range(inode, locked_page, > cow_start, found_key.offset - 1, > -- > 2.17.1 > -- Filipe David Manana, “Whether you think you can, or you think you can't — you're right.”