On Wed, Jun 13, 2012 at 06:19:10PM +0800, Liu Bo wrote: > we use larger extent state range for both readpages and read endio, so that > we can lock or unlock less and avoid most of split ops, then we'll reduce > write > locks taken at endio time. > > Signed-off-by: Liu Bo <liubo2...@cn.fujitsu.com> > --- > fs/btrfs/extent_io.c | 201 > +++++++++++++++++++++++++++++++++++++++++++++----- > 1 files changed, 182 insertions(+), 19 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index 081fe13..bb66e3c 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -2258,18 +2258,26 @@ static void end_bio_extent_readpage(struct bio *bio, > int err) > struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; > struct bio_vec *bvec = bio->bi_io_vec; > struct extent_io_tree *tree; > + struct extent_state *cached = NULL; > u64 start; > u64 end; > int whole_page; > int mirror; > int ret; > + u64 up_start, up_end, un_start, un_end; > + int up_first, un_first; > + int for_uptodate[bio->bi_vcnt];
The array size depends on an argument, compiler will handle it by dynamically expanding the stackframe. What is the expected maximum value for bi_vcnt ? There's no hard limit AFAICS, the value gets incremented on each page in the bio. If the function is called from the worker thread, it's not much of a problem even for values like 128. Alternate way to avoid over-limit stack consumption is to declare an array to hold a fair number of items (eg. 16), and fall back to kmalloc otherwise. > + int i = 0; > + > + up_start = un_start = (u64)-1; > + up_end = un_end = 0; > + up_first = un_first = 1; > > if (err) > uptodate = 0; > > do { > struct page *page = bvec->bv_page; > - struct extent_state *cached = NULL; > > pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " > "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, > @@ -2352,7 +2412,7 @@ static void end_bio_extent_readpage(struct bio *bio, > int err) > } > unlock_page(page); > } else { > - if (uptodate) { > + if (for_uptodate[i++]) { > check_page_uptodate(tree, page); > } else { > clearpageuptodate(page); > @@ -2360,6 +2420,7 @@ static void end_bio_extent_readpage(struct bio *bio, > int err) > } > check_page_locked(tree, page); > } > + ++bvec; can you please move the i++ increments here? From reading the code it's not clear on first sight if the sideeffects are ok. > } while (bvec <= bvec_end); > > bio_put(bio); > @@ -2554,6 +2615,8 @@ static int __extent_read_full_page(struct > extent_io_tree *tree, > > end = page_end; > while (1) { > + if (range_lock) > + break; with range_lock set, this is equivalent to calling 2896 set_page_extent_mapped(page); (plus the cleancache code), a few lines above. Is this inteded? It's actually called with '1' from a single place, process_batch_pages(). > lock_extent(tree, start, end); > ordered = btrfs_lookup_ordered_extent(inode, start); > if (!ordered) > @@ -3497,6 +3560,59 @@ int extent_writepages(struct extent_io_tree *tree, > return ret; > } > > +struct page_list { > + struct page *page; > + struct list_head list; > +}; > + > +static int process_batch_pages(struct extent_io_tree *tree, > + struct address_space *mapping, > + struct list_head *lock_pages, int *page_cnt, > + u64 lock_start, u64 lock_end, > + get_extent_t get_extent, struct bio **bio, > + unsigned long *bio_flags) > +{ > + u64 page_start; > + struct page_list *plist; > + > + while (1) { > + struct btrfs_ordered_extent *ordered = NULL; > + > + lock_extent(tree, lock_start, lock_end); > + page_start = lock_start; > + while (page_start < lock_end) { > + ordered = btrfs_lookup_ordered_extent(mapping->host, > + page_start); > + if (ordered) { > + page_start = ordered->file_offset; > + break; > + } > + page_start += PAGE_CACHE_SIZE; > + } > + if (!ordered) > + break; You're leaving the range [lock_start, lock_end) locked after taking this branch. Intended? > + unlock_extent(tree, lock_start, lock_end); > + btrfs_start_ordered_extent(mapping->host, ordered, 1); > + btrfs_put_ordered_extent(ordered); > + } > + > + plist = NULL; > + while (!list_empty(lock_pages)) { > + plist = list_entry(lock_pages->prev, struct page_list, list); > + > + __extent_read_full_page(tree, plist->page, get_extent, > + bio, 0, bio_flags, 1); > + page_cache_release(plist->page); > + list_del(&plist->list); > + plist->page = NULL; you can drop this assignment > + kfree(plist); > + (*page_cnt)--; > + } > + > + WARN_ON((*page_cnt)); > + return 0; > +} > + > int extent_readpages(struct extent_io_tree *tree, > struct address_space *mapping, > struct list_head *pages, unsigned nr_pages, -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html