If PF_MEMALLOC flag set allocation with __GFP_FS cannot enter into fs layer because PF_MEMALLOC forbids direct reclaim.
E.g. zswap_frontswap_store() uses GFP_KERNEL and called during reclaim possibly in NOFS, but __GFP_FS is fine because PF_MEMALLOC protects us from deadlock. WARNING: at mm/slub.c:1250 slab_pre_alloc_hook.isra.42.part.43+0x15/0x17() ... Call Trace: [<ffffffff8163413f>] dump_stack+0x19/0x1b [<ffffffff8107b620>] warn_slowpath_common+0x70/0xb0 [<ffffffff8107b76a>] warn_slowpath_null+0x1a/0x20 [<ffffffff816311c8>] slab_pre_alloc_hook.isra.42.part.43+0x15/0x17 [<ffffffff811d7235>] kmem_cache_alloc+0x55/0x220 [<ffffffff811c4a09>] zswap_frontswap_store+0xe9/0x320 [<ffffffff811c3d5b>] __frontswap_store+0x7b/0x110 [<ffffffff811bf6a3>] swap_writepage+0x23/0x80 [<ffffffff8118fff2>] shrink_page_list+0x4b2/0xa80 [<ffffffff81190c1b>] shrink_inactive_list+0x1fb/0x6c0 [<ffffffff811918b5>] shrink_lruvec+0x395/0x800 [<ffffffff81191e0f>] shrink_zone+0xef/0x2d0 [<ffffffff81192390>] do_try_to_free_pages+0x170/0x530 [<ffffffff81192825>] try_to_free_pages+0xd5/0x160 [<ffffffff81185aa5>] __alloc_pages_nodemask+0x8a5/0xc10 [<ffffffff811cb1da>] alloc_pages_current+0xaa/0x170 [<ffffffff811803de>] __get_free_pages+0xe/0x50 [<ffffffffa029a5aa>] jbd2_alloc+0x8a/0x90 [jbd2] [<ffffffffa028fab4>] do_get_write_access+0x1d4/0x4d0 [jbd2] [<ffffffffa028fdd7>] jbd2_journal_get_write_access+0x27/0x40 [jbd2] [<ffffffffa02e8e0b>] __ext4_journal_get_write_access+0x3b/0x80 [ext4] [<ffffffffa02efccd>] ext4_mb_mark_diskspace_used+0x7d/0x4f0 [ext4] [<ffffffffa02f156d>] ext4_mb_new_blocks+0x36d/0x620 [ext4] [<ffffffffa02e601d>] ext4_ext_map_blocks+0x49d/0xed0 [ext4] [<ffffffffa02b2689>] ext4_map_blocks+0x179/0x590 [ext4] [<ffffffffa02b5bc2>] ext4_writepages+0x692/0xd60 [ext4] [<ffffffff8118828e>] do_writepages+0x1e/0x40 With this commit d89e689bd0e8 ("mm/zswap: fix potential deadlock in zswap_frontswap_store()") is no longer required and can be reverted. Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> --- mm/page_alloc.c | 3 ++- mm/slab.c | 6 ++++-- mm/slub.c | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index afac33e..2607756 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3155,7 +3155,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT); - WARN_ON_ONCE((gfp_mask & __GFP_FS) && current->journal_info); + WARN_ON_ONCE((gfp_mask & __GFP_FS) && current->journal_info && + !(current->flags & PF_MEMALLOC)); if (should_fail_alloc_page(gfp_mask, order)) return NULL; diff --git a/mm/slab.c b/mm/slab.c index 4f0c22e..74944fe 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3343,7 +3343,8 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); - WARN_ON_ONCE((flags & __GFP_FS) && current->journal_info); + WARN_ON_ONCE((flags & __GFP_FS) && current->journal_info && + !(current->flags & PF_MEMALLOC)); if (slab_should_failslab(cachep, flags)) return NULL; @@ -3433,7 +3434,8 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); - WARN_ON_ONCE((flags & __GFP_FS) && current->journal_info); + WARN_ON_ONCE((flags & __GFP_FS) && current->journal_info && + !(current->flags & PF_MEMALLOC)); if (slab_should_failslab(cachep, flags)) return NULL; diff --git a/mm/slub.c b/mm/slub.c index 280adf6..96a7aba 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1266,7 +1266,8 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); might_sleep_if(flags & __GFP_WAIT); - WARN_ON_ONCE((flags & __GFP_FS) && current->journal_info); + WARN_ON_ONCE((flags & __GFP_FS) && current->journal_info && + !(current->flags & PF_MEMALLOC)); return should_failslab(s->object_size, flags, s->flags); } -- 2.10.2 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel