We used to issue a checksum to an extent state of 4K range for read endio, but we want to use larger range for performance optimization, so instead we create a radix tree for checksum, where an item stands for checksum of 4K data.
Signed-off-by: Liu Bo <liubo2...@cn.fujitsu.com> --- fs/btrfs/extent_io.c | 86 ++++++++++++------------------------------------- fs/btrfs/extent_io.h | 2 + fs/btrfs/inode.c | 7 +--- 3 files changed, 24 insertions(+), 71 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a55fbe6..e6433d4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -109,10 +109,12 @@ void extent_io_tree_init(struct extent_io_tree *tree, { tree->state = RB_ROOT; INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); + INIT_RADIX_TREE(&tree->csum, GFP_ATOMIC); tree->ops = NULL; tree->dirty_bytes = 0; spin_lock_init(&tree->lock); spin_lock_init(&tree->buffer_lock); + spin_lock_init(&tree->csum_lock); tree->mapping = mapping; } @@ -686,15 +688,6 @@ static void cache_state(struct extent_state *state, } } -static void uncache_state(struct extent_state **cached_ptr) -{ - if (cached_ptr && (*cached_ptr)) { - struct extent_state *state = *cached_ptr; - *cached_ptr = NULL; - free_extent_state(state); - } -} - /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -1649,56 +1642,32 @@ out: */ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { - struct rb_node *node; - struct extent_state *state; int ret = 0; - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - spin_unlock(&tree->lock); + spin_lock(&tree->csum_lock); + ret = radix_tree_insert(&tree->csum, (unsigned long)start, + (void *)((unsigned long)private << 1)); + BUG_ON(ret); + spin_unlock(&tree->csum_lock); return ret; } int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) { - struct rb_node *node; - struct extent_state *state; - int ret = 0; + void **slot = NULL; - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; + spin_lock(&tree->csum_lock); + slot = radix_tree_lookup_slot(&tree->csum, (unsigned long)start); + if (!slot) { + spin_unlock(&tree->csum_lock); + return -ENOENT; } - *private = state->private; -out: - spin_unlock(&tree->lock); - return ret; + *private = (u64)(*slot) >> 1; + + radix_tree_delete(&tree->csum, (unsigned long)start); + spin_unlock(&tree->csum_lock); + + return 0; } /* @@ -2266,7 +2235,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; struct extent_state *cached = NULL; - struct extent_state *state; pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, @@ -2285,20 +2253,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (state && state->start == start) { - /* - * take a reference on the state, unlock will drop - * the ref - */ - cache_state(state, &cached); - } - spin_unlock(&tree->lock); - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - state); + NULL); if (ret) uptodate = 0; else @@ -2325,13 +2282,12 @@ error_handled: test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; - uncache_state(&cached); continue; } if (tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook( bio, page, start, end, - failed_mirror, state); + failed_mirror, NULL); if (ret == 0) goto error_handled; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index cecc351..d85e361 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -95,10 +95,12 @@ struct extent_io_ops { struct extent_io_tree { struct rb_root state; struct radix_tree_root buffer; + struct radix_tree_root csum; struct address_space *mapping; u64 dirty_bytes; spinlock_t lock; spinlock_t buffer_lock; + spinlock_t csum_lock; struct extent_io_ops *ops; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cbeb2e3..e9c4d6c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1867,12 +1867,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - if (state && state->start == start) { - private = state->private; - ret = 0; - } else { - ret = get_state_private(io_tree, start, &private); - } + ret = get_state_private(io_tree, start, &private); kaddr = kmap_atomic(page, KM_USER0); if (ret) goto zeroit; -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html