[PATCHv3 29/41] ext4: make ext4_mpage_readpages() hugepage-aware

2016-09-15 Thread Kirill A. Shutemov
This patch modifies ext4_mpage_readpages() to deal with huge pages.

We read out 2M at once, so we have to alloc (HPAGE_PMD_NR *
blocks_per_page) sector_t for that. I'm not entirely happy with kmalloc
in this codepath, but don't see any other option.

Signed-off-by: Kirill A. Shutemov 
---
 fs/ext4/readpage.c | 38 --
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..6d7cbddceeb2 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -104,12 +104,12 @@ int ext4_mpage_readpages(struct address_space *mapping,
 
struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
-   const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
const unsigned blocksize = 1 << blkbits;
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
-   sector_t blocks[MAX_BUF_PER_PAGE];
+   sector_t blocks_on_stack[MAX_BUF_PER_PAGE];
+   sector_t *blocks = blocks_on_stack;
unsigned page_block;
struct block_device *bdev = inode->i_sb->s_bdev;
int length;
@@ -122,8 +122,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
map.m_flags = 0;
 
for (; nr_pages; nr_pages--) {
-   int fully_mapped = 1;
-   unsigned first_hole = blocks_per_page;
+   int fully_mapped = 1, nr = nr_pages;
+   unsigned blocks_per_page = PAGE_SIZE >> blkbits;
+   unsigned first_hole;
 
prefetchw(&page->flags);
if (pages) {
@@ -138,10 +139,31 @@ int ext4_mpage_readpages(struct address_space *mapping,
goto confused;
 
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
-   last_block = block_in_file + nr_pages * blocks_per_page;
+
+   if (PageTransHuge(page)) {
+   BUILD_BUG_ON(BIO_MAX_PAGES < HPAGE_PMD_NR);
+   nr = HPAGE_PMD_NR * blocks_per_page;
+   /* XXX: need a better solution ? */
+   blocks = kmalloc(sizeof(sector_t) * nr, GFP_NOFS);
+   if (!blocks) {
+   if (pages) {
+   delete_from_page_cache(page);
+   goto next_page;
+   }
+   return -ENOMEM;
+   }
+
+   blocks_per_page *= HPAGE_PMD_NR;
+   last_block = block_in_file + blocks_per_page;
+   } else {
+   blocks = blocks_on_stack;
+   last_block = block_in_file + nr * blocks_per_page;
+   }
+
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> 
blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
+   first_hole = blocks_per_page;
page_block = 0;
 
/*
@@ -213,6 +235,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
}
}
if (first_hole != blocks_per_page) {
+   if (PageTransHuge(page))
+   goto confused;
zero_user_segment(page, first_hole << blkbits,
  PAGE_SIZE);
if (first_hole == 0) {
@@ -248,7 +272,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
goto set_error_page;
}
bio = bio_alloc(GFP_KERNEL,
-   min_t(int, nr_pages, BIO_MAX_PAGES));
+   min_t(int, nr, BIO_MAX_PAGES));
if (!bio) {
if (ctx)
fscrypt_release_ctx(ctx);
@@ -289,5 +313,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
BUG_ON(pages && !list_empty(pages));
if (bio)
submit_bio(bio);
+   if (blocks != blocks_on_stack)
+   kfree(blocks);
return 0;
 }
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv3 29/41] ext4: make ext4_mpage_readpages() hugepage-aware

2016-09-15 Thread Andreas Dilger
On Sep 15, 2016, at 5:55 AM, Kirill A. Shutemov 
 wrote:
> 
> This patch modifies ext4_mpage_readpages() to deal with huge pages.
> 
> We read out 2M at once, so we have to alloc (HPAGE_PMD_NR *
> blocks_per_page) sector_t for that. I'm not entirely happy with kmalloc
> in this codepath, but don't see any other option.

If you're reading 2MB from disk (possibly from disjoint blocks with seeks
in between) I don't think that the kmalloc() is going to be the limiting
performance factor.  If you are concerned about the size of the kmalloc()
causing failures when pages are fragmented (it can be 16KB for 1KB blocks
with 4KB pages), then using ext4_kvmalloc() to fall back to vmalloc() in
case kmalloc() fails.  It shouldn't fail often for 16KB allocations,
but it could in theory.

I also notice that ext4_kvmalloc() should probably use unlikely() for
the failure case, so that the uncommon vmalloc() fallback is out-of-line
in this more important codepath.  The only other callers are during mount,
so a branch misprediction is not critical.

Cheers, Andreas

> 
> Signed-off-by: Kirill A. Shutemov 
> ---
> fs/ext4/readpage.c | 38 --
> 1 file changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index a81b829d56de..6d7cbddceeb2 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -104,12 +104,12 @@ int ext4_mpage_readpages(struct address_space *mapping,
> 
>   struct inode *inode = mapping->host;
>   const unsigned blkbits = inode->i_blkbits;
> - const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
>   const unsigned blocksize = 1 << blkbits;
>   sector_t block_in_file;
>   sector_t last_block;
>   sector_t last_block_in_file;
> - sector_t blocks[MAX_BUF_PER_PAGE];
> + sector_t blocks_on_stack[MAX_BUF_PER_PAGE];
> + sector_t *blocks = blocks_on_stack;
>   unsigned page_block;
>   struct block_device *bdev = inode->i_sb->s_bdev;
>   int length;
> @@ -122,8 +122,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
>   map.m_flags = 0;
> 
>   for (; nr_pages; nr_pages--) {
> - int fully_mapped = 1;
> - unsigned first_hole = blocks_per_page;
> + int fully_mapped = 1, nr = nr_pages;
> + unsigned blocks_per_page = PAGE_SIZE >> blkbits;
> + unsigned first_hole;
> 
>   prefetchw(&page->flags);
>   if (pages) {
> @@ -138,10 +139,31 @@ int ext4_mpage_readpages(struct address_space *mapping,
>   goto confused;
> 
>   block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
> - last_block = block_in_file + nr_pages * blocks_per_page;
> +
> + if (PageTransHuge(page)) {
> + BUILD_BUG_ON(BIO_MAX_PAGES < HPAGE_PMD_NR);
> + nr = HPAGE_PMD_NR * blocks_per_page;
> + /* XXX: need a better solution ? */
> + blocks = kmalloc(sizeof(sector_t) * nr, GFP_NOFS);
> + if (!blocks) {
> + if (pages) {
> + delete_from_page_cache(page);
> + goto next_page;
> + }
> + return -ENOMEM;
> + }
> +
> + blocks_per_page *= HPAGE_PMD_NR;
> + last_block = block_in_file + blocks_per_page;
> + } else {
> + blocks = blocks_on_stack;
> + last_block = block_in_file + nr * blocks_per_page;
> + }
> +
>   last_block_in_file = (i_size_read(inode) + blocksize - 1) >> 
> blkbits;
>   if (last_block > last_block_in_file)
>   last_block = last_block_in_file;
> + first_hole = blocks_per_page;
>   page_block = 0;
> 
>   /*
> @@ -213,6 +235,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
>   }
>   }
>   if (first_hole != blocks_per_page) {
> + if (PageTransHuge(page))
> + goto confused;
>   zero_user_segment(page, first_hole << blkbits,
> PAGE_SIZE);
>   if (first_hole == 0) {
> @@ -248,7 +272,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
>   goto set_error_page;
>   }
>   bio = bio_alloc(GFP_KERNEL,
> - min_t(int, nr_pages, BIO_MAX_PAGES));
> + min_t(int, nr, BIO_MAX_PAGES));
>   if (!bio) {
>   if (ctx)
>   fscrypt_release_ctx(ctx);
> @@ -289,5 +313,7 @@ int ext4_mpage_readpages(struct address_space *mapping

Re: [PATCHv3 29/41] ext4: make ext4_mpage_readpages() hugepage-aware

2016-09-16 Thread Kirill A. Shutemov
On Thu, Sep 15, 2016 at 02:55:11PM +0300, Kirill A. Shutemov wrote:
> This patch modifies ext4_mpage_readpages() to deal with huge pages.
> 
> We read out 2M at once, so we have to alloc (HPAGE_PMD_NR *
> blocks_per_page) sector_t for that. I'm not entirely happy with kmalloc
> in this codepath, but don't see any other option.
> 
> Signed-off-by: Kirill A. Shutemov 

0-DAY reported this:

compiler: powerpc64-linux-gnu-gcc (Debian 5.4.0-6) 5.4.0 20160609
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cro
+ss
chmod +x ~/bin/make.cross
git checkout d8bfe8f327288810a9a099b15f3c89a834d419a4
# save the attached .config to linux build tree
make.cross ARCH=powerpc

All errors (new ones prefixed by >>):

   In file included from include/linux/linkage.h:4:0,
from include/linux/kernel.h:6,
from fs/ext4/readpage.c:30:
   fs/ext4/readpage.c: In function 'ext4_mpage_readpages':
>> include/linux/compiler.h:491:38: error: call to '__compiletime_assert_144' 
>> declared with attribute error:
+BUILD_BUG_ON failed: BIO_MAX_PAGES < HPAGE_PMD_NR
 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
 ^
   include/linux/compiler.h:474:4: note: in definition of macro 
'__compiletime_assert'
   prefix ## suffix();\ 
  
   ^   
   include/linux/compiler.h:491:2: note: in expansion of macro 
'_compiletime_assert'
 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)   

 ^   
   include/linux/bug.h:51:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)  
^
   include/linux/bug.h:75:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
 BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)   
 ^  
   fs/ext4/readpage.c:144:4: note: in expansion of macro 'BUILD_BUG_ON'
   BUILD_BUG_ON(BIO_MAX_PAGES < HPAGE_PMD_NR); 
   ^  

The fixup:

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 6d7cbddceeb2..75b2a7700c9a 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -140,7 +140,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
 
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 
-   if (PageTransHuge(page)) {
+   if (PageTransHuge(page) &&
+   IS_ENABLED(TRANSPARENT_HUGE_PAGECACHE)) {
BUILD_BUG_ON(BIO_MAX_PAGES < HPAGE_PMD_NR);
nr = HPAGE_PMD_NR * blocks_per_page;
/* XXX: need a better solution ? */
-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv3 29/41] ext4: make ext4_mpage_readpages() hugepage-aware

2016-09-16 Thread Kirill A. Shutemov
On Thu, Sep 15, 2016 at 06:27:10AM -0600, Andreas Dilger wrote:
> On Sep 15, 2016, at 5:55 AM, Kirill A. Shutemov 
>  wrote:
> > 
> > This patch modifies ext4_mpage_readpages() to deal with huge pages.
> > 
> > We read out 2M at once, so we have to alloc (HPAGE_PMD_NR *
> > blocks_per_page) sector_t for that. I'm not entirely happy with kmalloc
> > in this codepath, but don't see any other option.
> 
> If you're reading 2MB from disk (possibly from disjoint blocks with seeks
> in between) I don't think that the kmalloc() is going to be the limiting
> performance factor.  If you are concerned about the size of the kmalloc()
> causing failures when pages are fragmented (it can be 16KB for 1KB blocks
> with 4KB pages), then using ext4_kvmalloc() to fall back to vmalloc() in
> case kmalloc() fails.  It shouldn't fail often for 16KB allocations,
> but it could in theory.

Good point. Will use ext4_kvmalloc().

> I also notice that ext4_kvmalloc() should probably use unlikely() for
> the failure case, so that the uncommon vmalloc() fallback is out-of-line
> in this more important codepath.  The only other callers are during mount,
> so a branch misprediction is not critical.

I agree. But it's out-of-scope for the patchset.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html