In this patch, we make three things:

a) skiplist -> rcu-skiplist
   This is quite direct, since in skiplist each level is a list,
   any modification to the skiplist refers to "pointers change",
   which fits RCU's sematic.

b) use rcu lock to protect extent_map instead of rwlock.

c) make extent_map reclaim after dropping the updater side lock.

Signed-off-by: Liu Bo <liubo2...@cn.fujitsu.com>
---
 fs/btrfs/compression.c |    8 +++---
 fs/btrfs/disk-io.c     |   15 ++++++----
 fs/btrfs/extent_io.c   |   13 ++++-----
 fs/btrfs/extent_map.c  |   39 +++++++++++++++++---------
 fs/btrfs/extent_map.h  |    7 +++--
 fs/btrfs/file.c        |   23 +++++++++++-----
 fs/btrfs/inode.c       |   69 ++++++++++++++++++++++++++++++++---------------
 fs/btrfs/ioctl.c       |    8 +++---
 fs/btrfs/relocation.c  |    9 ++++--
 fs/btrfs/scrub.c       |    4 +-
 fs/btrfs/skiplist.c    |    6 ++--
 fs/btrfs/skiplist.h    |   25 +++++++++++------
 fs/btrfs/volumes.c     |   46 ++++++++++++++++++--------------
 13 files changed, 168 insertions(+), 104 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a..bb4ac31 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -498,10 +498,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                 */
                set_page_extent_mapped(page);
                lock_extent(tree, last_offset, end, GFP_NOFS);
-               read_lock(&em_tree->lock);
+               rcu_read_lock();
                em = lookup_extent_mapping(em_tree, last_offset,
                                           PAGE_CACHE_SIZE);
-               read_unlock(&em_tree->lock);
+               rcu_read_unlock();
 
                if (!em || last_offset < em->start ||
                    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -583,11 +583,11 @@ int btrfs_submit_compressed_read(struct inode *inode, 
struct bio *bio,
        em_tree = &BTRFS_I(inode)->extent_tree;
 
        /* we need the actual starting offset of this extent in the file */
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree,
                                   page_offset(bio->bi_io_vec->bv_page),
                                   PAGE_CACHE_SIZE);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f9d555..2dbc969 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -189,17 +189,17 @@ static struct extent_map *btree_get_extent(struct inode 
*inode,
 {
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
        int ret;
 
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, start, len);
+       rcu_read_unlock();
        if (em) {
                em->bdev =
                        BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-               read_unlock(&em_tree->lock);
                goto out;
        }
-       read_unlock(&em_tree->lock);
 
        em = alloc_extent_map();
        if (!em) {
@@ -212,8 +212,12 @@ static struct extent_map *btree_get_extent(struct inode 
*inode,
        em->block_start = 0;
        em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
-       write_lock(&em_tree->lock);
-       ret = add_extent_mapping(em_tree, em);
+       spin_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2);
+       spin_unlock(&em_tree->lock);
+       free_extent_map(to_free1);
+       free_extent_map(to_free2);
+
        if (ret == -EEXIST) {
                u64 failed_start = em->start;
                u64 failed_len = em->len;
@@ -231,7 +235,6 @@ static struct extent_map *btree_get_extent(struct inode 
*inode,
                free_extent_map(em);
                em = NULL;
        }
-       write_unlock(&em_tree->lock);
 
        if (ret)
                em = ERR_PTR(ret);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 49f3c9d..30a8270 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2013,10 +2013,10 @@ static int bio_readpage_error(struct bio *failed_bio, 
struct page *page,
                failrec->bio_flags = 0;
                failrec->in_validation = 0;
 
-               read_lock(&em_tree->lock);
+               rcu_read_lock();
                em = lookup_extent_mapping(em_tree, start, failrec->len);
+               rcu_read_unlock();
                if (!em) {
-                       read_unlock(&em_tree->lock);
                        kfree(failrec);
                        return -EIO;
                }
@@ -2025,7 +2025,6 @@ static int bio_readpage_error(struct bio *failed_bio, 
struct page *page,
                        free_extent_map(em);
                        em = NULL;
                }
-               read_unlock(&em_tree->lock);
 
                if (!em || IS_ERR(em)) {
                        kfree(failrec);
@@ -3286,15 +3285,15 @@ int try_release_extent_mapping(struct extent_map_tree 
*map,
                u64 len;
                while (start <= end) {
                        len = end - start + 1;
-                       write_lock(&map->lock);
+                       spin_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
                        if (IS_ERR_OR_NULL(em)) {
-                               write_unlock(&map->lock);
+                               spin_unlock(&map->lock);
                                break;
                        }
                        if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
                            em->start != start) {
-                               write_unlock(&map->lock);
+                               spin_unlock(&map->lock);
                                free_extent_map(em);
                                break;
                        }
@@ -3307,7 +3306,7 @@ int try_release_extent_mapping(struct extent_map_tree 
*map,
                                free_extent_map(em);
                        }
                        start = extent_map_end(em);
-                       write_unlock(&map->lock);
+                       spin_unlock(&map->lock);
 
                        /* once for us */
                        free_extent_map(em);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 746084c..e2e8af0 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -67,7 +67,7 @@ void extent_map_tree_init(struct extent_map_tree *tree)
 {
        tree->head.start = (-1ULL);
        sl_init_list(&tree->map, &tree->head.sl_node);
-       rwlock_init(&tree->lock);
+       spin_lock_init(&tree->lock);
 }
 
 /**
@@ -100,8 +100,11 @@ struct extent_map *alloc_extent_map(void)
        return em;
 }
 
-static inline void __free_extent_map(struct extent_map *em)
+static inline void __free_extent_map(struct rcu_head *head)
 {
+       struct sl_node *node = container_of(head, struct sl_node, rcu_head);
+       struct extent_map *em = sl_entry(node, struct extent_map, sl_node);
+
 #if MAP_LEAK_DEBUG
        unsigned long flags;
 
@@ -129,7 +132,7 @@ void free_extent_map(struct extent_map *em)
 
        WARN_ON(atomic_read(&em->refs) == 0);
        if (atomic_dec_and_test(&em->refs))
-               __free_extent_map(em);
+               call_rcu(&em->sl_node.rcu_head, __free_extent_map);
 }
 
 static inline int in_entry(struct sl_node *node, u64 offset)
@@ -166,14 +169,14 @@ static struct sl_node *sl_search(struct sl_list *list, 
u64 offset,
 
        BUG_ON(!list);
        level = list->level;
-       p = list->head;
+       p = rcu_dereference(list->head);
        BUG_ON(!p);
 
        if (sl_empty(p))
                return NULL;
        do {
                while (entry = next_entry(p, level, &q), entry->start <= offset)
-                       p = q;
+                       p = rcu_dereference(q);
 
                if (in_entry(p, offset))
                        return p;
@@ -262,7 +265,9 @@ static int mergable_maps(struct extent_map *prev, struct 
extent_map *next)
        return 0;
 }
 
-static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
+static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em,
+                         struct extent_map **to_free1,
+                         struct extent_map **to_free2)
 {
        struct extent_map *merge = NULL;
        struct sl_node *sl;
@@ -278,7 +283,8 @@ static void try_merge_map(struct extent_map_tree *tree, 
struct extent_map *em)
                        em->block_start = merge->block_start;
                        merge->in_tree = 0;
                        sl_erase(&merge->sl_node, &tree->map);
-                       free_extent_map(merge);
+                       if (merge)
+                               *to_free1 = merge;
                }
        }
 
@@ -290,7 +296,8 @@ static void try_merge_map(struct extent_map_tree *tree, 
struct extent_map *em)
                em->block_len += merge->len;
                merge->in_tree = 0;
                sl_erase(&merge->sl_node, &tree->map);
-               free_extent_map(merge);
+               if (merge)
+                       *to_free2 = merge;
        }
 }
 
@@ -298,8 +305,9 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 
start, u64 len)
 {
        int ret = 0;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
 
-       write_lock(&tree->lock);
+       spin_lock(&tree->lock);
        em = lookup_extent_mapping(tree, start, len);
 
        WARN_ON(!em || em->start != start);
@@ -308,11 +316,13 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 
start, u64 len)
 
        clear_bit(EXTENT_FLAG_PINNED, &em->flags);
 
-       try_merge_map(tree, em);
+       try_merge_map(tree, em, &to_free1, &to_free2);
 
        free_extent_map(em);
 out:
-       write_unlock(&tree->lock);
+       spin_unlock(&tree->lock);
+       free_extent_map(to_free1);
+       free_extent_map(to_free2);
        return ret;
 }
 
@@ -326,8 +336,9 @@ out:
  * into the tree directly, with an additional reference taken, or a
  * reference dropped if the merge attempt was successful.
  */
-int add_extent_mapping(struct extent_map_tree *tree,
-                      struct extent_map *em)
+int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em,
+                      struct extent_map **to_free1,
+                      struct extent_map **to_free2)
 {
        int ret = 0;
        struct sl_node *sl_node;
@@ -340,7 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
 
        atomic_inc(&em->refs);
 
-       try_merge_map(tree, em);
+       try_merge_map(tree, em, to_free1, to_free2);
 out:
        return ret;
 }
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 6d2c247..c61a105 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -41,7 +41,7 @@ struct map_head {
 
 struct extent_map_tree {
        struct sl_list map;
-       rwlock_t lock;
+       spinlock_t lock;
        struct map_head head;
 };
 
@@ -62,8 +62,9 @@ static inline u64 extent_map_block_end(struct extent_map *em)
 void extent_map_tree_init(struct extent_map_tree *tree);
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
                                         u64 start, u64 len);
-int add_extent_mapping(struct extent_map_tree *tree,
-                      struct extent_map *em);
+int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em,
+                      struct extent_map **to_free1,
+                      struct extent_map **to_free2);
 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
 
 struct extent_map *alloc_extent_map(void);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index cc7492c..8284202 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -435,10 +435,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 
start, u64 end,
        struct extent_map *em;
        struct extent_map *split = NULL;
        struct extent_map *split2 = NULL;
+       struct extent_map *to_free[4];
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 len = end - start + 1;
        int ret;
        int testend = 1;
+       int i;
        unsigned long flags;
        int compressed = 0;
 
@@ -454,24 +456,27 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 
start, u64 end,
                        split2 = alloc_extent_map();
                BUG_ON(!split || !split2);
 
-               write_lock(&em_tree->lock);
+               for (i = 0; i < 4; i++)
+                       to_free[i] = NULL;
+               spin_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, len);
                if (!em) {
-                       write_unlock(&em_tree->lock);
+                       spin_unlock(&em_tree->lock);
                        break;
                }
+
                flags = em->flags;
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
                        if (testend && em->start + em->len >= start + len) {
                                free_extent_map(em);
-                               write_unlock(&em_tree->lock);
+                               spin_unlock(&em_tree->lock);
                                break;
                        }
                        start = em->start + em->len;
                        if (testend)
                                len = start + len - (em->start + em->len);
                        free_extent_map(em);
-                       write_unlock(&em_tree->lock);
+                       spin_unlock(&em_tree->lock);
                        continue;
                }
                compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -493,7 +498,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, 
u64 end,
                        split->bdev = em->bdev;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
-                       ret = add_extent_mapping(em_tree, split);
+                       ret = add_extent_mapping(em_tree, split, &to_free[0],
+                                                &to_free[1]);
                        BUG_ON(ret);
                        free_extent_map(split);
                        split = split2;
@@ -519,12 +525,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 
start, u64 end,
                                split->orig_start = split->start;
                        }
 
-                       ret = add_extent_mapping(em_tree, split);
+                       ret = add_extent_mapping(em_tree, split, &to_free[2],
+                                                &to_free[3]);
                        BUG_ON(ret);
                        free_extent_map(split);
                        split = NULL;
                }
-               write_unlock(&em_tree->lock);
+               spin_unlock(&em_tree->lock);
+               for (i = 0; i < 4; i++)
+                       free_extent_map(to_free[i]);
 
                /* once for us */
                free_extent_map(em);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 13b0542..d896b39 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -573,6 +573,7 @@ static noinline int submit_compressed_extents(struct inode 
*inode,
        struct btrfs_trans_handle *trans;
        struct btrfs_key ins;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct extent_io_tree *io_tree;
@@ -675,9 +676,12 @@ retry:
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 
                while (1) {
-                       write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em);
-                       write_unlock(&em_tree->lock);
+                       spin_lock(&em_tree->lock);
+                       ret = add_extent_mapping(em_tree, em, &to_free1,
+                                                &to_free2);
+                       spin_unlock(&em_tree->lock);
+                       free_extent_map(to_free1);
+                       free_extent_map(to_free2);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
@@ -732,8 +736,9 @@ static u64 get_extent_allocation_hint(struct inode *inode, 
u64 start,
        struct extent_map *em;
        u64 alloc_hint = 0;
 
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = search_extent_mapping(em_tree, start, num_bytes);
+       rcu_read_unlock();
        if (em) {
                /*
                 * if block start isn't an actual block number then find the
@@ -752,7 +757,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, 
u64 start,
                        free_extent_map(em);
                }
        }
-       read_unlock(&em_tree->lock);
 
        return alloc_hint;
 }
@@ -786,6 +790,7 @@ static noinline int cow_file_range(struct inode *inode,
        u64 blocksize = root->sectorsize;
        struct btrfs_key ins;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
@@ -854,9 +859,12 @@ static noinline int cow_file_range(struct inode *inode,
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
                while (1) {
-                       write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em);
-                       write_unlock(&em_tree->lock);
+                       spin_lock(&em_tree->lock);
+                       ret = add_extent_mapping(em_tree, em, &to_free1,
+                                                &to_free2);
+                       spin_unlock(&em_tree->lock);
+                       free_extent_map(to_free1);
+                       free_extent_map(to_free2);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
@@ -1195,6 +1203,7 @@ out_check:
 
                if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
                        struct extent_map *em;
+                       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
                        struct extent_map_tree *em_tree;
                        em_tree = &BTRFS_I(inode)->extent_tree;
                        em = alloc_extent_map();
@@ -1207,9 +1216,12 @@ out_check:
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
                        while (1) {
-                               write_lock(&em_tree->lock);
-                               ret = add_extent_mapping(em_tree, em);
-                               write_unlock(&em_tree->lock);
+                               spin_lock(&em_tree->lock);
+                               ret = add_extent_mapping(em_tree, em, &to_free1,
+                                                        &to_free2);
+                               spin_unlock(&em_tree->lock);
+                               free_extent_map(to_free1);
+                               free_extent_map(to_free2);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
                                        break;
@@ -4862,7 +4874,9 @@ out_fail:
 static int merge_extent_mapping(struct extent_map_tree *em_tree,
                                struct extent_map *existing,
                                struct extent_map *em,
-                               u64 map_start, u64 map_len)
+                               u64 map_start, u64 map_len,
+                               struct extent_map **to_free1,
+                               struct extent_map **to_free2)
 {
        u64 start_diff;
 
@@ -4875,7 +4889,7 @@ static int merge_extent_mapping(struct extent_map_tree 
*em_tree,
                em->block_start += start_diff;
                em->block_len -= start_diff;
        }
-       return add_extent_mapping(em_tree, em);
+       return add_extent_mapping(em_tree, em, to_free1, to_free2);
 }
 
 static noinline int uncompress_inline(struct btrfs_path *path,
@@ -4944,17 +4958,19 @@ struct extent_map *btrfs_get_extent(struct inode 
*inode, struct page *page,
        struct extent_buffer *leaf;
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
+       struct extent_map *to_free[4];
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_trans_handle *trans = NULL;
        int compress_type;
+       int i;
 
 again:
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, start, len);
        if (em)
                em->bdev = root->fs_info->fs_devices->latest_bdev;
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        if (em) {
                if (em->start > start || em->start + em->len <= start)
@@ -5166,8 +5182,10 @@ insert:
        }
 
        err = 0;
-       write_lock(&em_tree->lock);
-       ret = add_extent_mapping(em_tree, em);
+       for (i = 0; i < 4; i++)
+               to_free[i] = NULL;
+       spin_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em, &to_free[0], &to_free[1]);
        /* it is possible that someone inserted the extent into the tree
         * while we had the lock dropped.  It is also possible that
         * an overlapping map exists in the tree
@@ -5189,7 +5207,9 @@ insert:
                        if (existing) {
                                err = merge_extent_mapping(em_tree, existing,
                                                           em, start,
-                                                          root->sectorsize);
+                                                          root->sectorsize,
+                                                          &to_free[2],
+                                                          &to_free[3]);
                                free_extent_map(existing);
                                if (err) {
                                        free_extent_map(em);
@@ -5206,7 +5226,9 @@ insert:
                        err = 0;
                }
        }
-       write_unlock(&em_tree->lock);
+       spin_unlock(&em_tree->lock);
+       for (i = 0; i < 4; i++)
+               free_extent_map(to_free[i]);
 out:
 
        trace_btrfs_get_extent(root, em);
@@ -5414,9 +5436,12 @@ static struct extent_map *btrfs_new_extent_direct(struct 
inode *inode,
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
        while (insert) {
-               write_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               write_unlock(&em_tree->lock);
+               struct extent_map *to_free1 = NULL, *to_free2 = NULL;
+               spin_lock(&em_tree->lock);
+               ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2);
+               spin_unlock(&em_tree->lock);
+               free_extent_map(to_free1);
+               free_extent_map(to_free2);
                if (ret != -EEXIST)
                        break;
                btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c04f02c..83fc601 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -673,9 +673,9 @@ static int check_defrag_in_cache(struct inode *inode, u64 
offset, int thresh)
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 end;
 
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        if (em) {
                end = extent_map_end(em);
@@ -782,9 +782,9 @@ static int should_defrag_range(struct inode *inode, u64 
start, u64 len,
         * hopefully we have this extent in the tree already, try without
         * the full extent lock
         */
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, start, len);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        if (!em) {
                /* get the big lock and read metadata off disk */
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfb5543..b92d207 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2884,6 +2884,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, 
u64 end,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
        int ret = 0;
 
        em = alloc_extent_map();
@@ -2899,9 +2900,11 @@ int setup_extent_mapping(struct inode *inode, u64 start, 
u64 end,
 
        lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
        while (1) {
-               write_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               write_unlock(&em_tree->lock);
+               spin_lock(&em_tree->lock);
+               ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2);
+               spin_unlock(&em_tree->lock);
+               free_extent_map(to_free1);
+               free_extent_map(to_free2);
                if (ret != -EEXIST) {
                        free_extent_map(em);
                        break;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ddf2c90..5aec748 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1374,9 +1374,9 @@ static noinline_for_stack int scrub_chunk(struct 
scrub_dev *sdev,
        int i;
        int ret = -EINVAL;
 
-       read_lock(&map_tree->map_tree.lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-       read_unlock(&map_tree->map_tree.lock);
+       rcu_read_unlock();
 
        if (!em)
                return -EINVAL;
diff --git a/fs/btrfs/skiplist.c b/fs/btrfs/skiplist.c
index c803478..1069922 100644
--- a/fs/btrfs/skiplist.c
+++ b/fs/btrfs/skiplist.c
@@ -62,7 +62,7 @@ inline void sl_link_node(struct sl_node *node, struct sl_node 
**backlook,
 
                node->next[i] = q;
                node->prev[i] = p;
-               p->next[i] = node;
+               rcu_assign_pointer(p->next[i], node);
                q->prev[i] = node;
 
                i++;
@@ -78,11 +78,11 @@ void sl_erase(struct sl_node *node, struct sl_list *list)
 
        level = node->level;
 
-       for (i = 0; i <= level; i++) {
+       for (i = level; i >= 0; i--) {
                prev = node->prev[i];
                next = node->next[i];
 
-               prev->next[i] = next;
+               rcu_assign_pointer(prev->next[i], next);
                next->prev[i] = prev;
                node->next[i] = node;
                node->prev[i] = node;
diff --git a/fs/btrfs/skiplist.h b/fs/btrfs/skiplist.h
index 3e414b5..2ae997d 100644
--- a/fs/btrfs/skiplist.h
+++ b/fs/btrfs/skiplist.h
@@ -102,41 +102,48 @@ struct sl_node *sl_insert_node(struct sl_list *list, u64 
offset,
 #define _SKIPLIST_H
 
 #include <linux/random.h>
+#include <linux/rcupdate.h>
 
 #define MAXLEVEL 16
 /* double p = 0.25; */
 
 struct sl_node {
-       struct sl_node **next;
-       struct sl_node **prev;
+       struct sl_node __rcu **next;
+       struct sl_node __rcu **prev;
+       struct rcu_head rcu_head;
        unsigned int level;
        unsigned int head:1;
 };
 
 struct sl_list {
-       struct sl_node *head;
-       struct sl_node *h_next[MAXLEVEL];
-       struct sl_node *h_prev[MAXLEVEL];
+       struct sl_node __rcu *head;
+       struct sl_node __rcu *h_next[MAXLEVEL];
+       struct sl_node __rcu *h_prev[MAXLEVEL];
        unsigned int level;
 };
 
-#define sl_entry(ptr, type, member) container_of(ptr, type, member)
+#define sl_entry(ptr, type, member) \
+       ({ \
+               typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
+               container_of((typeof(ptr))rcu_dereference(__ptr), \
+                            type, member); \
+       })
 
 static inline int sl_empty(const struct sl_node *head)
 {
-       return head->next[0] == head;
+       return (rcu_dereference(head->next[0]) == head);
 }
 
 static inline struct sl_node *__sl_next_with_level(struct sl_node *node,
                                                   int level)
 {
-       return node->next[level];
+       return rcu_dereference(node->next[level]);
 }
 
 static inline struct sl_node *__sl_prev_with_level(struct sl_node *node,
                                                   int level)
 {
-       return node->prev[level];
+       return rcu_dereference(node->prev[level]);
 }
 
 static inline struct sl_node *sl_next(struct sl_node *node)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index adaac9e..c41502d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1955,9 +1955,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
         * step two, delete the device extents and the
         * chunk tree entries
         */
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        BUG_ON(em->start > chunk_offset ||
               em->start + em->len < chunk_offset);
@@ -1988,9 +1988,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
        BUG_ON(ret);
 
-       write_lock(&em_tree->lock);
+       spin_lock(&em_tree->lock);
        remove_extent_mapping(em_tree, em);
-       write_unlock(&em_tree->lock);
+       spin_unlock(&em_tree->lock);
 
        kfree(map);
        em->bdev = NULL;
@@ -2378,6 +2378,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle 
*trans,
        struct map_lookup *map = NULL;
        struct extent_map_tree *em_tree;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
        struct btrfs_device_info *devices_info = NULL;
        u64 total_avail;
        int num_stripes;        /* total number of stripes to allocate */
@@ -2589,9 +2590,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle 
*trans,
        em->block_len = em->len;
 
        em_tree = &extent_root->fs_info->mapping_tree.map_tree;
-       write_lock(&em_tree->lock);
-       ret = add_extent_mapping(em_tree, em);
-       write_unlock(&em_tree->lock);
+       spin_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em, &to_free1, &to_free2);
+       spin_unlock(&em_tree->lock);
+       free_extent_map(to_free1);
+       free_extent_map(to_free2);
        BUG_ON(ret);
        free_extent_map(em);
 
@@ -2800,9 +2803,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 
chunk_offset)
        int readonly = 0;
        int i;
 
-       read_lock(&map_tree->map_tree.lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-       read_unlock(&map_tree->map_tree.lock);
+       rcu_read_unlock();
        if (!em)
                return 1;
 
@@ -2854,9 +2857,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, 
u64 logical, u64 len)
        struct extent_map_tree *em_tree = &map_tree->map_tree;
        int ret;
 
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, logical, len);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2921,9 +2924,9 @@ again:
                atomic_set(&bbio->error, 0);
        }
 
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, logical, *length);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        if (!em) {
                printk(KERN_CRIT "unable to find logical %llu len %llu\n",
@@ -3187,9 +3190,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        u64 stripe_nr;
        int i, j, nr = 0;
 
-       read_lock(&em_tree->lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(em_tree, chunk_start, 1);
-       read_unlock(&em_tree->lock);
+       rcu_read_unlock();
 
        BUG_ON(!em || em->start != chunk_start);
        map = (struct map_lookup *)em->bdev;
@@ -3461,6 +3464,7 @@ static int read_one_chunk(struct btrfs_root *root, struct 
btrfs_key *key,
        struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
        struct map_lookup *map;
        struct extent_map *em;
+       struct extent_map *to_free1 = NULL, *to_free2 = NULL;
        u64 logical;
        u64 length;
        u64 devid;
@@ -3472,9 +3476,9 @@ static int read_one_chunk(struct btrfs_root *root, struct 
btrfs_key *key,
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
 
-       read_lock(&map_tree->map_tree.lock);
+       rcu_read_lock();
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
-       read_unlock(&map_tree->map_tree.lock);
+       rcu_read_unlock();
 
        /* already mapped? */
        if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3533,9 +3537,11 @@ static int read_one_chunk(struct btrfs_root *root, 
struct btrfs_key *key,
                map->stripes[i].dev->in_fs_metadata = 1;
        }
 
-       write_lock(&map_tree->map_tree.lock);
-       ret = add_extent_mapping(&map_tree->map_tree, em);
-       write_unlock(&map_tree->map_tree.lock);
+       spin_lock(&map_tree->map_tree.lock);
+       ret = add_extent_mapping(&map_tree->map_tree, em, &to_free1, &to_free2);
+       spin_unlock(&map_tree->map_tree.lock);
+       free_extent_map(to_free1);
+       free_extent_map(to_free2);
        BUG_ON(ret);
        free_extent_map(em);
 
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to