[PATCH V20 01/19] Btrfs: subpage-blocksize: Fix whole page read.

2016-07-03 Thread Chandan Rajendra
For the subpage-blocksize scenario, a page can contain multiple
blocks. In such cases, this patch handles reading data from files.

To track the status of individual blocks of a page, this patch makes use
of a bitmap pointed to by the newly introduced per-page 'struct
btrfs_page_private'.

The per-page btrfs_page_private->io_lock plays the same role as
BH_Uptodate_Lock (see end_buffer_async_read()) i.e. without the io_lock
we may end up in the following situation,

NOTE: Assume 64k page size and 4k block size. Also assume that the first
12 blocks of the page are contiguous while the next 4 blocks are
contiguous. When reading the page we end up submitting two "logical
address space" bios. So end_bio_extent_readpage function is invoked
twice, once for each bio.

|-+-+-|
| Task A  | Task B  | Task C  |
|-+-+-|
| end_bio_extent_readpage | | |
| process block 0 | | |
| - clear BLK_STATE_IO| | |
| - page_read_complete| | |
| process block 1 | | |
| | | |
| | | |
| | end_bio_extent_readpage | |
| | process block 0 | |
| | - clear BLK_STATE_IO| |
| | - page_read_complete| |
| | process block 1 | |
| | | |
| process block 11| process block 3 | |
| - clear BLK_STATE_IO| - clear BLK_STATE_IO| |
| - page_read_complete| - page_read_complete| |
|   - returns true|   - returns true| |
|   - unlock_page()   | | |
| | | lock_page() |
| |   - unlock_page()   | |
|-+-+-|

We end up incorrectly unlocking the page twice and "Task C" ends up
working on an unlocked page. So private->io_lock makes sure that only
one of the tasks gets "true" as the return value when page_io_complete()
is invoked. As an optimization the patch gets the io_lock only when the
last block of the bio_vec is being processed.

Signed-off-by: Chandan Rajendra 
---
 fs/btrfs/extent_io.c | 371 ---
 fs/btrfs/extent_io.h |  74 +-
 fs/btrfs/inode.c |  16 +--
 3 files changed, 338 insertions(+), 123 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e197d47..a349f99 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -24,6 +24,7 @@
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
+static struct kmem_cache *page_private_cache;
 static struct bio_set *btrfs_bioset;
 
 static inline bool extent_state_in_tree(const struct extent_state *state)
@@ -174,10 +175,16 @@ int __init extent_io_init(void)
if (!extent_buffer_cache)
goto free_state_cache;
 
+   page_private_cache = kmem_cache_create("btrfs_page_private",
+   sizeof(struct btrfs_page_private), 0,
+   SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+   if (!page_private_cache)
+   goto free_buffer_cache;
+
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
 offsetof(struct btrfs_io_bio, bio));
if (!btrfs_bioset)
-   goto free_buffer_cache;
+   goto free_page_private_cache;
 
if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
goto free_bioset;
@@ -188,6 +195,10 @@ free_bioset:
bioset_free(btrfs_bioset);
btrfs_bioset = NULL;
 
+free_page_private_cache:
+   kmem_cache_destroy(page_private_cache);
+   page_private_cache = NULL;
+
 free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
@@ -1323,6 +1334,95 @@ int clear_record_extent_bits(struct extent_io_tree 
*tree, u64 start, u64 end,
  changeset);
 }
 
+static int modify_page_blks_state(struct page *page,
+   unsigned long blk_states,
+   u64 start, u64 end, int set)
+{
+   struct inode *inode = page->mapping->host;
+   unsigned long *bitmap;
+   unsigned long first_state;
+   unsigned long state;
+   u64 nr_blks;
+   u64 blk;
+
+   ASSERT(BTRFS_I(ino

Re: [PATCH V20 01/19] Btrfs: subpage-blocksize: Fix whole page read.

2016-07-26 Thread Josef Bacik

On 07/04/2016 12:34 AM, Chandan Rajendra wrote:

For the subpage-blocksize scenario, a page can contain multiple
blocks. In such cases, this patch handles reading data from files.

To track the status of individual blocks of a page, this patch makes use
of a bitmap pointed to by the newly introduced per-page 'struct
btrfs_page_private'.

The per-page btrfs_page_private->io_lock plays the same role as
BH_Uptodate_Lock (see end_buffer_async_read()) i.e. without the io_lock
we may end up in the following situation,

NOTE: Assume 64k page size and 4k block size. Also assume that the first
12 blocks of the page are contiguous while the next 4 blocks are
contiguous. When reading the page we end up submitting two "logical
address space" bios. So end_bio_extent_readpage function is invoked
twice, once for each bio.

|-+-+-|
| Task A  | Task B  | Task C  |
|-+-+-|
| end_bio_extent_readpage | | |
| process block 0 | | |
| - clear BLK_STATE_IO| | |
| - page_read_complete| | |
| process block 1 | | |
| | | |
| | | |
| | end_bio_extent_readpage | |
| | process block 0 | |
| | - clear BLK_STATE_IO| |
| | - page_read_complete| |
| | process block 1 | |
| | | |
| process block 11| process block 3 | |
| - clear BLK_STATE_IO| - clear BLK_STATE_IO| |
| - page_read_complete| - page_read_complete| |
|   - returns true|   - returns true| |
|   - unlock_page()   | | |
| | | lock_page() |
| |   - unlock_page()   | |
|-+-+-|

We end up incorrectly unlocking the page twice and "Task C" ends up
working on an unlocked page. So private->io_lock makes sure that only
one of the tasks gets "true" as the return value when page_io_complete()
is invoked. As an optimization the patch gets the io_lock only when the
last block of the bio_vec is being processed.

Signed-off-by: Chandan Rajendra 
---
 fs/btrfs/extent_io.c | 371 ---
 fs/btrfs/extent_io.h |  74 +-
 fs/btrfs/inode.c |  16 +--
 3 files changed, 338 insertions(+), 123 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e197d47..a349f99 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -24,6 +24,7 @@

 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
+static struct kmem_cache *page_private_cache;
 static struct bio_set *btrfs_bioset;

 static inline bool extent_state_in_tree(const struct extent_state *state)
@@ -174,10 +175,16 @@ int __init extent_io_init(void)
if (!extent_buffer_cache)
goto free_state_cache;

+   page_private_cache = kmem_cache_create("btrfs_page_private",
+   sizeof(struct btrfs_page_private), 0,
+   SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+   if (!page_private_cache)
+   goto free_buffer_cache;
+
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
 offsetof(struct btrfs_io_bio, bio));
if (!btrfs_bioset)
-   goto free_buffer_cache;
+   goto free_page_private_cache;

if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
goto free_bioset;
@@ -188,6 +195,10 @@ free_bioset:
bioset_free(btrfs_bioset);
btrfs_bioset = NULL;

+free_page_private_cache:
+   kmem_cache_destroy(page_private_cache);
+   page_private_cache = NULL;
+
 free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
@@ -1323,6 +1334,95 @@ int clear_record_extent_bits(struct extent_io_tree 
*tree, u64 start, u64 end,
  changeset);
 }

+static int modify_page_blks_state(struct page *page,
+   unsigned long blk_states,
+   u64 start, u64 end, int set)
+{
+   struct inode *inode = page->mapping->host;
+   unsigned long *bitmap;
+   unsigned long first_state;
+   unsigned long state;
+   u64 nr_blks;
+

Re: [PATCH V20 01/19] Btrfs: subpage-blocksize: Fix whole page read.

2016-07-27 Thread Chandan Rajendra
On Tuesday, July 26, 2016 12:11:49 PM Josef Bacik wrote:
> On 07/04/2016 12:34 AM, Chandan Rajendra wrote:
> > For the subpage-blocksize scenario, a page can contain multiple
> > blocks. In such cases, this patch handles reading data from files.
> >
> > To track the status of individual blocks of a page, this patch makes use
> > of a bitmap pointed to by the newly introduced per-page 'struct
> > btrfs_page_private'.
> >
> > The per-page btrfs_page_private->io_lock plays the same role as
> > BH_Uptodate_Lock (see end_buffer_async_read()) i.e. without the io_lock
> > we may end up in the following situation,
> >
> > NOTE: Assume 64k page size and 4k block size. Also assume that the first
> > 12 blocks of the page are contiguous while the next 4 blocks are
> > contiguous. When reading the page we end up submitting two "logical
> > address space" bios. So end_bio_extent_readpage function is invoked
> > twice, once for each bio.
> >
> > |-+-+-|
> > | Task A  | Task B  | Task C  |
> > |-+-+-|
> > | end_bio_extent_readpage | | |
> > | process block 0 | | |
> > | - clear BLK_STATE_IO| | |
> > | - page_read_complete| | |
> > | process block 1 | | |
> > | | | |
> > | | | |
> > | | end_bio_extent_readpage | |
> > | | process block 0 | |
> > | | - clear BLK_STATE_IO| |
> > | | - page_read_complete| |
> > | | process block 1 | |
> > | | | |
> > | process block 11| process block 3 | |
> > | - clear BLK_STATE_IO| - clear BLK_STATE_IO| |
> > | - page_read_complete| - page_read_complete| |
> > |   - returns true|   - returns true| |
> > |   - unlock_page()   | | |
> > | | | lock_page() |
> > | |   - unlock_page()   | |
> > |-+-+-|
> >
> > We end up incorrectly unlocking the page twice and "Task C" ends up
> > working on an unlocked page. So private->io_lock makes sure that only
> > one of the tasks gets "true" as the return value when page_io_complete()
> > is invoked. As an optimization the patch gets the io_lock only when the
> > last block of the bio_vec is being processed.
> >
> > Signed-off-by: Chandan Rajendra 
> > ---
> >  fs/btrfs/extent_io.c | 371 
> > ---
> >  fs/btrfs/extent_io.h |  74 +-
> >  fs/btrfs/inode.c |  16 +--
> >  3 files changed, 338 insertions(+), 123 deletions(-)
> >
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index e197d47..a349f99 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -24,6 +24,7 @@
> >
> >  static struct kmem_cache *extent_state_cache;
> >  static struct kmem_cache *extent_buffer_cache;
> > +static struct kmem_cache *page_private_cache;
> >  static struct bio_set *btrfs_bioset;
> >
> >  static inline bool extent_state_in_tree(const struct extent_state *state)
> > @@ -174,10 +175,16 @@ int __init extent_io_init(void)
> > if (!extent_buffer_cache)
> > goto free_state_cache;
> >
> > +   page_private_cache = kmem_cache_create("btrfs_page_private",
> > +   sizeof(struct btrfs_page_private), 0,
> > +   SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
> > +   if (!page_private_cache)
> > +   goto free_buffer_cache;
> > +
> > btrfs_bioset = bioset_create(BIO_POOL_SIZE,
> >  offsetof(struct btrfs_io_bio, bio));
> > if (!btrfs_bioset)
> > -   goto free_buffer_cache;
> > +   goto free_page_private_cache;
> >
> > if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
> > goto free_bioset;
> > @@ -188,6 +195,10 @@ free_bioset:
> > bioset_free(btrfs_bioset);
> > btrfs_bioset = NULL;
> >
> > +free_page_private_cache:
> > +   kmem_cache_destroy(page_private_cache);
> > +   page_private_cache = NULL;
> > +
> >  free_buffer_cache:
> > kmem_cache_destroy(extent_buffer_cache);
> > extent_buffer_cache = NULL;
> > @@ -1323,6 +1334,95 @@ int clear_record_extent_bits(struct extent_io_tree 
> > *tree, u64 start, u64 end,
> >