On Fri, Dec 22, 2017 at 09:42:17AM +0200, Nikolay Borisov wrote: > > > On 22.12.2017 00:42, Liu Bo wrote: > > We've observed that btrfs_get_extent() and merge_extent_mapping() could > > return -EEXIST in several cases, and they are caused by some racy > > condition, e.g dio read vs dio write, which makes the problem very tricky > > to reproduce. > > > > This adds extent map selftests in order to simulate those racy situations. > > > > Signed-off-by: Liu Bo <bo.li....@oracle.com> > > --- > > fs/btrfs/Makefile | 2 +- > > fs/btrfs/tests/btrfs-tests.c | 3 + > > fs/btrfs/tests/btrfs-tests.h | 1 + > > fs/btrfs/tests/extent-map-tests.c | 202 > > ++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 207 insertions(+), 1 deletion(-) > > create mode 100644 fs/btrfs/tests/extent-map-tests.c > > > > diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile > > index 6fe881d..0c43736 100644 > > --- a/fs/btrfs/Makefile > > +++ b/fs/btrfs/Makefile > > @@ -19,4 +19,4 @@ btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o > > btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ > > tests/extent-buffer-tests.o tests/btrfs-tests.o \ > > tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \ > > - tests/free-space-tree-tests.o > > + tests/free-space-tree-tests.o tests/extent-map-tests.o > > diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c > > index d3f2537..9786d8c 100644 > > --- a/fs/btrfs/tests/btrfs-tests.c > > +++ b/fs/btrfs/tests/btrfs-tests.c > > @@ -277,6 +277,9 @@ int btrfs_run_sanity_tests(void) > > goto out; > > } > > } > > + ret = btrfs_test_extent_map(); > > + if (ret) > > + goto out; > > out: > > btrfs_destroy_test_fs(); > > return ret; > > diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h > > index 266f1e3..bc0615b 100644 > > --- a/fs/btrfs/tests/btrfs-tests.h > > +++ b/fs/btrfs/tests/btrfs-tests.h > > @@ -33,6 +33,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); > > int btrfs_test_inodes(u32 sectorsize, u32 nodesize); > > int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); > > int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); > > +int btrfs_test_extent_map(void); > > struct inode *btrfs_new_test_inode(void); > > struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 > > sectorsize); > > void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info); > > diff --git a/fs/btrfs/tests/extent-map-tests.c > > b/fs/btrfs/tests/extent-map-tests.c > > new file mode 100644 > > index 0000000..0407396 > > --- /dev/null > > +++ b/fs/btrfs/tests/extent-map-tests.c > > @@ -0,0 +1,202 @@ > > +/* > > + * Copyright (C) 2017 Oracle. All rights reserved. > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public > > + * License v2 as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public > > + * License along with this program; if not, write to the > > + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, > > + * Boston, MA 021110-1307, USA. > > + */ > > + > > +#include <linux/types.h> > > +#include "btrfs-tests.h" > > +#include "../ctree.h" > > + > > +static void free_extent_map_tree(struct extent_map_tree *em_tree) > > +{ > > + struct extent_map *em; > > + struct rb_node *node; > > + > > + while (!RB_EMPTY_ROOT(&em_tree->map)) { > > + node = rb_first(&em_tree->map); > > + em = rb_entry(node, struct extent_map, rb_node); > > + remove_extent_mapping(em_tree, em); > > + > > +#ifdef CONFIG_BTRFS_DEBUG > > + if (refcount_read(&em->refs) != 1) { > > + test_msg( > > + "Oops we have a em leak: em (start 0x%llx len > > 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n", > > + em->start, em->len, em->block_start, > > + em->block_len, refcount_read(&em->refs)); > > + > > + refcount_set(&em->refs, 1); > > + } > > +#endif > > + free_extent_map(em); > > + } > > +} > > + > > +/* > > + * Test scenario: > > + * > > + * Suppose that no extent map has been loaded into memory yet, there is a > > file > > + * extent [0, 16K), followed by another file extent [16K, 20K), two dio > > reads > > + * are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), > > t2 is > > + * reading [0, 8K) > > + * > > + * t1 t2 > > + * btrfs_get_extent() btrfs_get_extent() > > + * -> lookup_extent_mapping() ->lookup_extent_mapping() > > + * -> add_extent_mapping(0, 16K) > > + * -> return em > > + * ->add_extent_mapping(0, 16K) > > + * -> #handle -EEXIST > > + */ > > +static void test_case_1(struct extent_map_tree *em_tree) > > +{ > > So here you are testing the "fully overlapping" case in > btrfs_add_extent_mapping. > What about another test case about merging 2 adjacent extents i.e. this block > of code: > > } else if (start >= extent_map_end(existing) || start <= existing->start) { >
They're in the following patches. > > + struct extent_map *em; > > + u64 start = 0; > > + u64 len = SZ_8K; > > + int ret; > > + > > + em = alloc_extent_map(); > > + if (!em) > > + /* Skip the test on error. */ > > + return; > > + > > + /* Add [0, 16K) */ > > + em->start = 0; > > + em->len = SZ_16K; > > + em->block_start = 0; > > + em->block_len = SZ_16K; > > + ret = add_extent_mapping(em_tree, em, 0); > > + ASSERT(ret == 0); > > + free_extent_map(em); > > + > > + /* Add [16K, 20K) following [0, 16K) */ > > + em = alloc_extent_map(); > > + if (!em) > > + goto out; > > + > > + em->start = SZ_16K; > > + em->len = SZ_4K; > > + em->block_start = SZ_32K; /* avoid merging */ > > + em->block_len = SZ_4K; > > + ret = add_extent_mapping(em_tree, em, 0); > > + ASSERT(ret == 0); > > + free_extent_map(em); > > + > > + em = alloc_extent_map(); > > + if (!em) > > + goto out; > > + > > + /* Add [0, 8K), should return [0, 16K) instead. */ > > + em->start = start; > > + em->len = len; > > + em->block_start = start; > > + em->block_len = len; > > Any particular reason you are using start/len and not the constants as in the > previous 2 extent insertion cases? Makes it a lot easier to see what's > happening > without reading the above variables? > The above comment showed it, didn't it? The following if (ret) and if (em...) also uses start and len. thanks, -liubo > > + ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len); > > + if (ret) > > + test_msg("case1 [%llu %llu]: ret %d\n", > > + start, start + len, ret); > > + if (em && > > + (em->start != 0 || extent_map_end(em) != SZ_16K || > > + em->block_start != 0 || em->block_len != SZ_16K)) > > + test_msg("case1 [%llu %llu]: ret %d return a wrong em (start > > %llu len %llu block_start %llu block_len %llu\n", > > + start, start + len, ret, em->start, em->len, > > + em->block_start, em->block_len); > > + free_extent_map(em); > > +out: > > + /* free memory */ > > + free_extent_map_tree(em_tree); > > +} > > + > > +/* > > + * Test scenario: > > + * > > + * Reading the inline ending up with EEXIST, ie. read an inline > > + * extent and discard page cache and read it again. > > + */ > > +static void test_case_2(struct extent_map_tree *em_tree) > > +{ > > + struct extent_map *em; > > + int ret; > > + > > + em = alloc_extent_map(); > > + if (!em) > > + /* Skip the test on error. */ > > + return; > > + > > + /* Add [0, 1K) */ > > + em->start = 0; > > + em->len = SZ_1K; > > + em->block_start = EXTENT_MAP_INLINE; > > + em->block_len = (u64)-1; > > + ret = add_extent_mapping(em_tree, em, 0); > > + ASSERT(ret == 0); > > + free_extent_map(em); > > + > > + /* Add [4K, 4K) following [0, 1K) */ > > + em = alloc_extent_map(); > > + if (!em) > > + goto out; > > + > > + em->start = SZ_4K; > > + em->len = SZ_4K; > > + em->block_start = SZ_4K; > > + em->block_len = SZ_4K; > > + ret = add_extent_mapping(em_tree, em, 0); > > + ASSERT(ret == 0); > > + free_extent_map(em); > > + > > + em = alloc_extent_map(); > > + if (!em) > > + goto out; > > + > > + /* Add [0, 1K) */ > > + em->start = 0; > > + em->len = SZ_1K; > > + em->block_start = EXTENT_MAP_INLINE; > > + em->block_len = (u64)-1; > > + ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len); > > + if (ret) > > + test_msg("case2 [0 1K]: ret %d\n", ret); > > + if (em && > > + (em->start != 0 || extent_map_end(em) != SZ_1K || > > + em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) > > + test_msg("case2 [0 1K]: ret %d return a wrong em (start %llu > > len %llu block_start %llu block_len %llu\n", > > + ret, em->start, em->len, em->block_start, > > + em->block_len); > > + free_extent_map(em); > > +out: > > + /* free memory */ > > + free_extent_map_tree(em_tree); > > +} > > + > > +int btrfs_test_extent_map() > > +{ > > + struct extent_map_tree *em_tree; > > + > > + test_msg("Running extent_map tests\n"); > > + > > + em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL); > > + if (!em_tree) > > + /* Skip the test on error. */ > > + return 0; > > + > > + extent_map_tree_init(em_tree); > > + > > + test_case_1(em_tree); > > + test_case_2(em_tree); > > + > > + kfree(em_tree); > > + return 0; > > +} > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html