[PATCH V20 01/19] Btrfs: subpage-blocksize: Fix whole page read.
For the subpage-blocksize scenario, a page can contain multiple blocks. In such cases, this patch handles reading data from files. To track the status of individual blocks of a page, this patch makes use of a bitmap pointed to by the newly introduced per-page 'struct btrfs_page_private'. The per-page btrfs_page_private->io_lock plays the same role as BH_Uptodate_Lock (see end_buffer_async_read()) i.e. without the io_lock we may end up in the following situation, NOTE: Assume 64k page size and 4k block size. Also assume that the first 12 blocks of the page are contiguous while the next 4 blocks are contiguous. When reading the page we end up submitting two "logical address space" bios. So end_bio_extent_readpage function is invoked twice, once for each bio. |-+-+-| | Task A | Task B | Task C | |-+-+-| | end_bio_extent_readpage | | | | process block 0 | | | | - clear BLK_STATE_IO| | | | - page_read_complete| | | | process block 1 | | | | | | | | | | | | | end_bio_extent_readpage | | | | process block 0 | | | | - clear BLK_STATE_IO| | | | - page_read_complete| | | | process block 1 | | | | | | | process block 11| process block 3 | | | - clear BLK_STATE_IO| - clear BLK_STATE_IO| | | - page_read_complete| - page_read_complete| | | - returns true| - returns true| | | - unlock_page() | | | | | | lock_page() | | | - unlock_page() | | |-+-+-| We end up incorrectly unlocking the page twice and "Task C" ends up working on an unlocked page. So private->io_lock makes sure that only one of the tasks gets "true" as the return value when page_io_complete() is invoked. As an optimization the patch gets the io_lock only when the last block of the bio_vec is being processed. Signed-off-by: Chandan Rajendra --- fs/btrfs/extent_io.c | 371 --- fs/btrfs/extent_io.h | 74 +- fs/btrfs/inode.c | 16 +-- 3 files changed, 338 insertions(+), 123 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e197d47..a349f99 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -24,6 +24,7 @@ static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; +static struct kmem_cache *page_private_cache; static struct bio_set *btrfs_bioset; static inline bool extent_state_in_tree(const struct extent_state *state) @@ -174,10 +175,16 @@ int __init extent_io_init(void) if (!extent_buffer_cache) goto free_state_cache; + page_private_cache = kmem_cache_create("btrfs_page_private", + sizeof(struct btrfs_page_private), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + if (!page_private_cache) + goto free_buffer_cache; + btrfs_bioset = bioset_create(BIO_POOL_SIZE, offsetof(struct btrfs_io_bio, bio)); if (!btrfs_bioset) - goto free_buffer_cache; + goto free_page_private_cache; if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) goto free_bioset; @@ -188,6 +195,10 @@ free_bioset: bioset_free(btrfs_bioset); btrfs_bioset = NULL; +free_page_private_cache: + kmem_cache_destroy(page_private_cache); + page_private_cache = NULL; + free_buffer_cache: kmem_cache_destroy(extent_buffer_cache); extent_buffer_cache = NULL; @@ -1323,6 +1334,95 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, changeset); } +static int modify_page_blks_state(struct page *page, + unsigned long blk_states, + u64 start, u64 end, int set) +{ + struct inode *inode = page->mapping->host; + unsigned long *bitmap; + unsigned long first_state; + unsigned long state; + u64 nr_blks; + u64 blk; + + ASSERT(BTRFS_I(ino
Re: [PATCH V20 01/19] Btrfs: subpage-blocksize: Fix whole page read.
On 07/04/2016 12:34 AM, Chandan Rajendra wrote: For the subpage-blocksize scenario, a page can contain multiple blocks. In such cases, this patch handles reading data from files. To track the status of individual blocks of a page, this patch makes use of a bitmap pointed to by the newly introduced per-page 'struct btrfs_page_private'. The per-page btrfs_page_private->io_lock plays the same role as BH_Uptodate_Lock (see end_buffer_async_read()) i.e. without the io_lock we may end up in the following situation, NOTE: Assume 64k page size and 4k block size. Also assume that the first 12 blocks of the page are contiguous while the next 4 blocks are contiguous. When reading the page we end up submitting two "logical address space" bios. So end_bio_extent_readpage function is invoked twice, once for each bio. |-+-+-| | Task A | Task B | Task C | |-+-+-| | end_bio_extent_readpage | | | | process block 0 | | | | - clear BLK_STATE_IO| | | | - page_read_complete| | | | process block 1 | | | | | | | | | | | | | end_bio_extent_readpage | | | | process block 0 | | | | - clear BLK_STATE_IO| | | | - page_read_complete| | | | process block 1 | | | | | | | process block 11| process block 3 | | | - clear BLK_STATE_IO| - clear BLK_STATE_IO| | | - page_read_complete| - page_read_complete| | | - returns true| - returns true| | | - unlock_page() | | | | | | lock_page() | | | - unlock_page() | | |-+-+-| We end up incorrectly unlocking the page twice and "Task C" ends up working on an unlocked page. So private->io_lock makes sure that only one of the tasks gets "true" as the return value when page_io_complete() is invoked. As an optimization the patch gets the io_lock only when the last block of the bio_vec is being processed. Signed-off-by: Chandan Rajendra --- fs/btrfs/extent_io.c | 371 --- fs/btrfs/extent_io.h | 74 +- fs/btrfs/inode.c | 16 +-- 3 files changed, 338 insertions(+), 123 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e197d47..a349f99 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -24,6 +24,7 @@ static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; +static struct kmem_cache *page_private_cache; static struct bio_set *btrfs_bioset; static inline bool extent_state_in_tree(const struct extent_state *state) @@ -174,10 +175,16 @@ int __init extent_io_init(void) if (!extent_buffer_cache) goto free_state_cache; + page_private_cache = kmem_cache_create("btrfs_page_private", + sizeof(struct btrfs_page_private), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + if (!page_private_cache) + goto free_buffer_cache; + btrfs_bioset = bioset_create(BIO_POOL_SIZE, offsetof(struct btrfs_io_bio, bio)); if (!btrfs_bioset) - goto free_buffer_cache; + goto free_page_private_cache; if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) goto free_bioset; @@ -188,6 +195,10 @@ free_bioset: bioset_free(btrfs_bioset); btrfs_bioset = NULL; +free_page_private_cache: + kmem_cache_destroy(page_private_cache); + page_private_cache = NULL; + free_buffer_cache: kmem_cache_destroy(extent_buffer_cache); extent_buffer_cache = NULL; @@ -1323,6 +1334,95 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, changeset); } +static int modify_page_blks_state(struct page *page, + unsigned long blk_states, + u64 start, u64 end, int set) +{ + struct inode *inode = page->mapping->host; + unsigned long *bitmap; + unsigned long first_state; + unsigned long state; + u64 nr_blks; +
Re: [PATCH V20 01/19] Btrfs: subpage-blocksize: Fix whole page read.
On Tuesday, July 26, 2016 12:11:49 PM Josef Bacik wrote: > On 07/04/2016 12:34 AM, Chandan Rajendra wrote: > > For the subpage-blocksize scenario, a page can contain multiple > > blocks. In such cases, this patch handles reading data from files. > > > > To track the status of individual blocks of a page, this patch makes use > > of a bitmap pointed to by the newly introduced per-page 'struct > > btrfs_page_private'. > > > > The per-page btrfs_page_private->io_lock plays the same role as > > BH_Uptodate_Lock (see end_buffer_async_read()) i.e. without the io_lock > > we may end up in the following situation, > > > > NOTE: Assume 64k page size and 4k block size. Also assume that the first > > 12 blocks of the page are contiguous while the next 4 blocks are > > contiguous. When reading the page we end up submitting two "logical > > address space" bios. So end_bio_extent_readpage function is invoked > > twice, once for each bio. > > > > |-+-+-| > > | Task A | Task B | Task C | > > |-+-+-| > > | end_bio_extent_readpage | | | > > | process block 0 | | | > > | - clear BLK_STATE_IO| | | > > | - page_read_complete| | | > > | process block 1 | | | > > | | | | > > | | | | > > | | end_bio_extent_readpage | | > > | | process block 0 | | > > | | - clear BLK_STATE_IO| | > > | | - page_read_complete| | > > | | process block 1 | | > > | | | | > > | process block 11| process block 3 | | > > | - clear BLK_STATE_IO| - clear BLK_STATE_IO| | > > | - page_read_complete| - page_read_complete| | > > | - returns true| - returns true| | > > | - unlock_page() | | | > > | | | lock_page() | > > | | - unlock_page() | | > > |-+-+-| > > > > We end up incorrectly unlocking the page twice and "Task C" ends up > > working on an unlocked page. So private->io_lock makes sure that only > > one of the tasks gets "true" as the return value when page_io_complete() > > is invoked. As an optimization the patch gets the io_lock only when the > > last block of the bio_vec is being processed. > > > > Signed-off-by: Chandan Rajendra > > --- > > fs/btrfs/extent_io.c | 371 > > --- > > fs/btrfs/extent_io.h | 74 +- > > fs/btrfs/inode.c | 16 +-- > > 3 files changed, 338 insertions(+), 123 deletions(-) > > > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > > index e197d47..a349f99 100644 > > --- a/fs/btrfs/extent_io.c > > +++ b/fs/btrfs/extent_io.c > > @@ -24,6 +24,7 @@ > > > > static struct kmem_cache *extent_state_cache; > > static struct kmem_cache *extent_buffer_cache; > > +static struct kmem_cache *page_private_cache; > > static struct bio_set *btrfs_bioset; > > > > static inline bool extent_state_in_tree(const struct extent_state *state) > > @@ -174,10 +175,16 @@ int __init extent_io_init(void) > > if (!extent_buffer_cache) > > goto free_state_cache; > > > > + page_private_cache = kmem_cache_create("btrfs_page_private", > > + sizeof(struct btrfs_page_private), 0, > > + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); > > + if (!page_private_cache) > > + goto free_buffer_cache; > > + > > btrfs_bioset = bioset_create(BIO_POOL_SIZE, > > offsetof(struct btrfs_io_bio, bio)); > > if (!btrfs_bioset) > > - goto free_buffer_cache; > > + goto free_page_private_cache; > > > > if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) > > goto free_bioset; > > @@ -188,6 +195,10 @@ free_bioset: > > bioset_free(btrfs_bioset); > > btrfs_bioset = NULL; > > > > +free_page_private_cache: > > + kmem_cache_destroy(page_private_cache); > > + page_private_cache = NULL; > > + > > free_buffer_cache: > > kmem_cache_destroy(extent_buffer_cache); > > extent_buffer_cache = NULL; > > @@ -1323,6 +1334,95 @@ int clear_record_extent_bits(struct extent_io_tree > > *tree, u64 start, u64 end, > >