In this patch, we make three things: a) skiplist -> rcu-skiplist This is quite direct, since in skiplist each level is a list, any modification to the skiplist refers to "pointers change", which fits RCU's sematic.
b) use rcu lock to protect extent_map instead of rwlock. c) make extent_map reclaim after dropping the updater side lock. Signed-off-by: Liu Bo <liubo2...@cn.fujitsu.com> --- fs/btrfs/compression.c | 8 +++--- fs/btrfs/disk-io.c | 15 ++++++---- fs/btrfs/extent_io.c | 13 ++++----- fs/btrfs/extent_map.c | 39 +++++++++++++++++--------- fs/btrfs/extent_map.h | 7 +++-- fs/btrfs/file.c | 23 +++++++++++----- fs/btrfs/inode.c | 69 ++++++++++++++++++++++++++++++++--------------- fs/btrfs/ioctl.c | 8 +++--- fs/btrfs/relocation.c | 9 ++++-- fs/btrfs/scrub.c | 4 +- fs/btrfs/skiplist.c | 6 ++-- fs/btrfs/skiplist.h | 25 +++++++++++------ fs/btrfs/volumes.c | 46 ++++++++++++++++++-------------- 13 files changed, 168 insertions(+), 104 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 14f1c5a..bb4ac31 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -498,10 +498,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, */ set_page_extent_mapped(page); lock_extent(tree, last_offset, end, GFP_NOFS); - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, last_offset, PAGE_CACHE_SIZE); - read_unlock(&em_tree->lock); + rcu_read_unlock(); if (!em || last_offset < em->start || (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || @@ -583,11 +583,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, em_tree = &BTRFS_I(inode)->extent_tree; /* we need the actual starting offset of this extent in the file */ - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, page_offset(bio->bi_io_vec->bv_page), PAGE_CACHE_SIZE); - read_unlock(&em_tree->lock); + rcu_read_unlock(); compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f9d555..2dbc969 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -189,17 +189,17 @@ static struct extent_map *btree_get_extent(struct inode *inode, { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; int ret; - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, start, len); + rcu_read_unlock(); if (em) { em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - read_unlock(&em_tree->lock); goto out; } - read_unlock(&em_tree->lock); em = alloc_extent_map(); if (!em) { @@ -212,8 +212,12 @@ static struct extent_map *btree_get_extent(struct inode *inode, em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); + if (ret == -EEXIST) { u64 failed_start = em->start; u64 failed_len = em->len; @@ -231,7 +235,6 @@ static struct extent_map *btree_get_extent(struct inode *inode, free_extent_map(em); em = NULL; } - write_unlock(&em_tree->lock); if (ret) em = ERR_PTR(ret); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 49f3c9d..30a8270 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2013,10 +2013,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, failrec->bio_flags = 0; failrec->in_validation = 0; - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, start, failrec->len); + rcu_read_unlock(); if (!em) { - read_unlock(&em_tree->lock); kfree(failrec); return -EIO; } @@ -2025,7 +2025,6 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, free_extent_map(em); em = NULL; } - read_unlock(&em_tree->lock); if (!em || IS_ERR(em)) { kfree(failrec); @@ -3286,15 +3285,15 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 len; while (start <= end) { len = end - start + 1; - write_lock(&map->lock); + spin_lock(&map->lock); em = lookup_extent_mapping(map, start, len); if (IS_ERR_OR_NULL(em)) { - write_unlock(&map->lock); + spin_unlock(&map->lock); break; } if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || em->start != start) { - write_unlock(&map->lock); + spin_unlock(&map->lock); free_extent_map(em); break; } @@ -3307,7 +3306,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, free_extent_map(em); } start = extent_map_end(em); - write_unlock(&map->lock); + spin_unlock(&map->lock); /* once for us */ free_extent_map(em); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 746084c..e2e8af0 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -67,7 +67,7 @@ void extent_map_tree_init(struct extent_map_tree *tree) { tree->head.start = (-1ULL); sl_init_list(&tree->map, &tree->head.sl_node); - rwlock_init(&tree->lock); + spin_lock_init(&tree->lock); } /** @@ -100,8 +100,11 @@ struct extent_map *alloc_extent_map(void) return em; } -static inline void __free_extent_map(struct extent_map *em) +static inline void __free_extent_map(struct rcu_head *head) { + struct sl_node *node = container_of(head, struct sl_node, rcu_head); + struct extent_map *em = sl_entry(node, struct extent_map, sl_node); + #if MAP_LEAK_DEBUG unsigned long flags; @@ -129,7 +132,7 @@ void free_extent_map(struct extent_map *em) WARN_ON(atomic_read(&em->refs) == 0); if (atomic_dec_and_test(&em->refs)) - __free_extent_map(em); + call_rcu(&em->sl_node.rcu_head, __free_extent_map); } static inline int in_entry(struct sl_node *node, u64 offset) @@ -166,14 +169,14 @@ static struct sl_node *sl_search(struct sl_list *list, u64 offset, BUG_ON(!list); level = list->level; - p = list->head; + p = rcu_dereference(list->head); BUG_ON(!p); if (sl_empty(p)) return NULL; do { while (entry = next_entry(p, level, &q), entry->start <= offset) - p = q; + p = rcu_dereference(q); if (in_entry(p, offset)) return p; @@ -262,7 +265,9 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } -static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) +static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em, + struct extent_map **to_free1, + struct extent_map **to_free2) { struct extent_map *merge = NULL; struct sl_node *sl; @@ -278,7 +283,8 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->block_start = merge->block_start; merge->in_tree = 0; sl_erase(&merge->sl_node, &tree->map); - free_extent_map(merge); + if (merge) + *to_free1 = merge; } } @@ -290,7 +296,8 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->block_len += merge->len; merge->in_tree = 0; sl_erase(&merge->sl_node, &tree->map); - free_extent_map(merge); + if (merge) + *to_free2 = merge; } } @@ -298,8 +305,9 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) { int ret = 0; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; - write_lock(&tree->lock); + spin_lock(&tree->lock); em = lookup_extent_mapping(tree, start, len); WARN_ON(!em || em->start != start); @@ -308,11 +316,13 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) clear_bit(EXTENT_FLAG_PINNED, &em->flags); - try_merge_map(tree, em); + try_merge_map(tree, em, &to_free1, &to_free2); free_extent_map(em); out: - write_unlock(&tree->lock); + spin_unlock(&tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); return ret; } @@ -326,8 +336,9 @@ out: * into the tree directly, with an additional reference taken, or a * reference dropped if the merge attempt was successful. */ -int add_extent_mapping(struct extent_map_tree *tree, - struct extent_map *em) +int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em, + struct extent_map **to_free1, + struct extent_map **to_free2) { int ret = 0; struct sl_node *sl_node; @@ -340,7 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree, atomic_inc(&em->refs); - try_merge_map(tree, em); + try_merge_map(tree, em, to_free1, to_free2); out: return ret; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 6d2c247..c61a105 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -41,7 +41,7 @@ struct map_head { struct extent_map_tree { struct sl_list map; - rwlock_t lock; + spinlock_t lock; struct map_head head; }; @@ -62,8 +62,9 @@ static inline u64 extent_map_block_end(struct extent_map *em) void extent_map_tree_init(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len); -int add_extent_mapping(struct extent_map_tree *tree, - struct extent_map *em); +int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em, + struct extent_map **to_free1, + struct extent_map **to_free2); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); struct extent_map *alloc_extent_map(void); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cc7492c..8284202 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -435,10 +435,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, struct extent_map *em; struct extent_map *split = NULL; struct extent_map *split2 = NULL; + struct extent_map *to_free[4]; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 len = end - start + 1; int ret; int testend = 1; + int i; unsigned long flags; int compressed = 0; @@ -454,24 +456,27 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split2 = alloc_extent_map(); BUG_ON(!split || !split2); - write_lock(&em_tree->lock); + for (i = 0; i < 4; i++) + to_free[i] = NULL; + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (!em) { - write_unlock(&em_tree->lock); + spin_unlock(&em_tree->lock); break; } + flags = em->flags; if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { if (testend && em->start + em->len >= start + len) { free_extent_map(em); - write_unlock(&em_tree->lock); + spin_unlock(&em_tree->lock); break; } start = em->start + em->len; if (testend) len = start + len - (em->start + em->len); free_extent_map(em); - write_unlock(&em_tree->lock); + spin_unlock(&em_tree->lock); continue; } compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); @@ -493,7 +498,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->bdev = em->bdev; split->flags = flags; split->compress_type = em->compress_type; - ret = add_extent_mapping(em_tree, split); + ret = add_extent_mapping(em_tree, split, &to_free[0], + &to_free[1]); BUG_ON(ret); free_extent_map(split); split = split2; @@ -519,12 +525,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->orig_start = split->start; } - ret = add_extent_mapping(em_tree, split); + ret = add_extent_mapping(em_tree, split, &to_free[2], + &to_free[3]); BUG_ON(ret); free_extent_map(split); split = NULL; } - write_unlock(&em_tree->lock); + spin_unlock(&em_tree->lock); + for (i = 0; i < 4; i++) + free_extent_map(to_free[i]); /* once for us */ free_extent_map(em); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 13b0542..d896b39 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -573,6 +573,7 @@ static noinline int submit_compressed_extents(struct inode *inode, struct btrfs_trans_handle *trans; struct btrfs_key ins; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree; @@ -675,9 +676,12 @@ retry: set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, + &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); if (ret != -EEXIST) { free_extent_map(em); break; @@ -732,8 +736,9 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, struct extent_map *em; u64 alloc_hint = 0; - read_lock(&em_tree->lock); + rcu_read_lock(); em = search_extent_mapping(em_tree, start, num_bytes); + rcu_read_unlock(); if (em) { /* * if block start isn't an actual block number then find the @@ -752,7 +757,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, free_extent_map(em); } } - read_unlock(&em_tree->lock); return alloc_hint; } @@ -786,6 +790,7 @@ static noinline int cow_file_range(struct inode *inode, u64 blocksize = root->sectorsize; struct btrfs_key ins; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; @@ -854,9 +859,12 @@ static noinline int cow_file_range(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, + &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); if (ret != -EEXIST) { free_extent_map(em); break; @@ -1195,6 +1203,7 @@ out_check: if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; struct extent_map_tree *em_tree; em_tree = &BTRFS_I(inode)->extent_tree; em = alloc_extent_map(); @@ -1207,9 +1216,12 @@ out_check: em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, + &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); if (ret != -EEXIST) { free_extent_map(em); break; @@ -4862,7 +4874,9 @@ out_fail: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, - u64 map_start, u64 map_len) + u64 map_start, u64 map_len, + struct extent_map **to_free1, + struct extent_map **to_free2) { u64 start_diff; @@ -4875,7 +4889,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, em->block_start += start_diff; em->block_len -= start_diff; } - return add_extent_mapping(em_tree, em); + return add_extent_mapping(em_tree, em, to_free1, to_free2); } static noinline int uncompress_inline(struct btrfs_path *path, @@ -4944,17 +4958,19 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_buffer *leaf; struct btrfs_key found_key; struct extent_map *em = NULL; + struct extent_map *to_free[4]; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; int compress_type; + int i; again: - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, start, len); if (em) em->bdev = root->fs_info->fs_devices->latest_bdev; - read_unlock(&em_tree->lock); + rcu_read_unlock(); if (em) { if (em->start > start || em->start + em->len <= start) @@ -5166,8 +5182,10 @@ insert: } err = 0; - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + for (i = 0; i < 4; i++) + to_free[i] = NULL; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free[0], &to_free[1]); /* it is possible that someone inserted the extent into the tree * while we had the lock dropped. It is also possible that * an overlapping map exists in the tree @@ -5189,7 +5207,9 @@ insert: if (existing) { err = merge_extent_mapping(em_tree, existing, em, start, - root->sectorsize); + root->sectorsize, + &to_free[2], + &to_free[3]); free_extent_map(existing); if (err) { free_extent_map(em); @@ -5206,7 +5226,9 @@ insert: err = 0; } } - write_unlock(&em_tree->lock); + spin_unlock(&em_tree->lock); + for (i = 0; i < 4; i++) + free_extent_map(to_free[i]); out: trace_btrfs_get_extent(root, em); @@ -5414,9 +5436,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); while (insert) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + struct extent_map *to_free1 = NULL, *to_free2 = NULL; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); if (ret != -EEXIST) break; btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c04f02c..83fc601 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -673,9 +673,9 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 end; - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); - read_unlock(&em_tree->lock); + rcu_read_unlock(); if (em) { end = extent_map_end(em); @@ -782,9 +782,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, * hopefully we have this extent in the tree already, try without * the full extent lock */ - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, start, len); - read_unlock(&em_tree->lock); + rcu_read_unlock(); if (!em) { /* get the big lock and read metadata off disk */ diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfb5543..b92d207 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2884,6 +2884,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; int ret = 0; em = alloc_extent_map(); @@ -2899,9 +2900,11 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); if (ret != -EEXIST) { free_extent_map(em); break; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ddf2c90..5aec748 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1374,9 +1374,9 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, int i; int ret = -EINVAL; - read_lock(&map_tree->map_tree.lock); + rcu_read_lock(); em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - read_unlock(&map_tree->map_tree.lock); + rcu_read_unlock(); if (!em) return -EINVAL; diff --git a/fs/btrfs/skiplist.c b/fs/btrfs/skiplist.c index c803478..1069922 100644 --- a/fs/btrfs/skiplist.c +++ b/fs/btrfs/skiplist.c @@ -62,7 +62,7 @@ inline void sl_link_node(struct sl_node *node, struct sl_node **backlook, node->next[i] = q; node->prev[i] = p; - p->next[i] = node; + rcu_assign_pointer(p->next[i], node); q->prev[i] = node; i++; @@ -78,11 +78,11 @@ void sl_erase(struct sl_node *node, struct sl_list *list) level = node->level; - for (i = 0; i <= level; i++) { + for (i = level; i >= 0; i--) { prev = node->prev[i]; next = node->next[i]; - prev->next[i] = next; + rcu_assign_pointer(prev->next[i], next); next->prev[i] = prev; node->next[i] = node; node->prev[i] = node; diff --git a/fs/btrfs/skiplist.h b/fs/btrfs/skiplist.h index 3e414b5..2ae997d 100644 --- a/fs/btrfs/skiplist.h +++ b/fs/btrfs/skiplist.h @@ -102,41 +102,48 @@ struct sl_node *sl_insert_node(struct sl_list *list, u64 offset, #define _SKIPLIST_H #include <linux/random.h> +#include <linux/rcupdate.h> #define MAXLEVEL 16 /* double p = 0.25; */ struct sl_node { - struct sl_node **next; - struct sl_node **prev; + struct sl_node __rcu **next; + struct sl_node __rcu **prev; + struct rcu_head rcu_head; unsigned int level; unsigned int head:1; }; struct sl_list { - struct sl_node *head; - struct sl_node *h_next[MAXLEVEL]; - struct sl_node *h_prev[MAXLEVEL]; + struct sl_node __rcu *head; + struct sl_node __rcu *h_next[MAXLEVEL]; + struct sl_node __rcu *h_prev[MAXLEVEL]; unsigned int level; }; -#define sl_entry(ptr, type, member) container_of(ptr, type, member) +#define sl_entry(ptr, type, member) \ + ({ \ + typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference(__ptr), \ + type, member); \ + }) static inline int sl_empty(const struct sl_node *head) { - return head->next[0] == head; + return (rcu_dereference(head->next[0]) == head); } static inline struct sl_node *__sl_next_with_level(struct sl_node *node, int level) { - return node->next[level]; + return rcu_dereference(node->next[level]); } static inline struct sl_node *__sl_prev_with_level(struct sl_node *node, int level) { - return node->prev[level]; + return rcu_dereference(node->prev[level]); } static inline struct sl_node *sl_next(struct sl_node *node) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index adaac9e..c41502d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1955,9 +1955,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, * step two, delete the device extents and the * chunk tree entries */ - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, chunk_offset, 1); - read_unlock(&em_tree->lock); + rcu_read_unlock(); BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); @@ -1988,9 +1988,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); BUG_ON(ret); - write_lock(&em_tree->lock); + spin_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + spin_unlock(&em_tree->lock); kfree(map); em->bdev = NULL; @@ -2378,6 +2378,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct map_lookup *map = NULL; struct extent_map_tree *em_tree; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; struct btrfs_device_info *devices_info = NULL; u64 total_avail; int num_stripes; /* total number of stripes to allocate */ @@ -2589,9 +2590,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em->block_len = em->len; em_tree = &extent_root->fs_info->mapping_tree.map_tree; - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2); + spin_unlock(&em_tree->lock); + free_extent_map(to_free1); + free_extent_map(to_free2); BUG_ON(ret); free_extent_map(em); @@ -2800,9 +2803,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) int readonly = 0; int i; - read_lock(&map_tree->map_tree.lock); + rcu_read_lock(); em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - read_unlock(&map_tree->map_tree.lock); + rcu_read_unlock(); if (!em) return 1; @@ -2854,9 +2857,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) struct extent_map_tree *em_tree = &map_tree->map_tree; int ret; - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, logical, len); - read_unlock(&em_tree->lock); + rcu_read_unlock(); BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); @@ -2921,9 +2924,9 @@ again: atomic_set(&bbio->error, 0); } - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, logical, *length); - read_unlock(&em_tree->lock); + rcu_read_unlock(); if (!em) { printk(KERN_CRIT "unable to find logical %llu len %llu\n", @@ -3187,9 +3190,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 stripe_nr; int i, j, nr = 0; - read_lock(&em_tree->lock); + rcu_read_lock(); em = lookup_extent_mapping(em_tree, chunk_start, 1); - read_unlock(&em_tree->lock); + rcu_read_unlock(); BUG_ON(!em || em->start != chunk_start); map = (struct map_lookup *)em->bdev; @@ -3461,6 +3464,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; struct map_lookup *map; struct extent_map *em; + struct extent_map *to_free1 = NULL, *to_free2 = NULL; u64 logical; u64 length; u64 devid; @@ -3472,9 +3476,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); - read_lock(&map_tree->map_tree.lock); + rcu_read_lock(); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); - read_unlock(&map_tree->map_tree.lock); + rcu_read_unlock(); /* already mapped? */ if (em && em->start <= logical && em->start + em->len > logical) { @@ -3533,9 +3537,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev->in_fs_metadata = 1; } - write_lock(&map_tree->map_tree.lock); - ret = add_extent_mapping(&map_tree->map_tree, em); - write_unlock(&map_tree->map_tree.lock); + spin_lock(&map_tree->map_tree.lock); + ret = add_extent_mapping(&map_tree->map_tree, em, &to_free1, &to_free2); + spin_unlock(&map_tree->map_tree.lock); + free_extent_map(to_free1); + free_extent_map(to_free2); BUG_ON(ret); free_extent_map(em); -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html