From: Johannes Weiner <han...@cmpxchg.org>

The memalloc_use_memcg() function to override the default memcg
accounting context currently doesn't nest. But the patches to make the
loop driver cgroup-aware will end up nesting:

[   98.137605]  alloc_page_buffers+0x210/0x288
[   98.141799]  __getblk_gfp+0x1d4/0x400
[   98.145475]  ext4_read_block_bitmap_nowait+0x148/0xbc8
[   98.150628]  ext4_mb_init_cache+0x25c/0x9b0
[   98.154821]  ext4_mb_init_group+0x270/0x390
[   98.159014]  ext4_mb_good_group+0x264/0x270
[   98.163208]  ext4_mb_regular_allocator+0x480/0x798
[   98.168011]  ext4_mb_new_blocks+0x958/0x10f8
[   98.172294]  ext4_ext_map_blocks+0xec8/0x1618
[   98.176660]  ext4_map_blocks+0x1b8/0x8a0
[   98.180592]  ext4_writepages+0x830/0xf10
[   98.184523]  do_writepages+0xb4/0x198
[   98.188195]  __filemap_fdatawrite_range+0x170/0x1c8
[   98.193086]  filemap_write_and_wait_range+0x40/0xb0
[   98.197974]  ext4_punch_hole+0x4a4/0x660
[   98.201907]  ext4_fallocate+0x294/0x1190
[   98.205839]  loop_process_work+0x690/0x1100
[   98.210032]  loop_workfn+0x2c/0x110
[   98.213529]  process_one_work+0x3e0/0x648
[   98.217546]  worker_thread+0x70/0x670
[   98.221217]  kthread+0x1b8/0x1c0
[   98.224452]  ret_from_fork+0x10/0x18

where loop_process_work() sets the memcg override to the memcg that
submitted the IO request, and alloc_page_buffers() sets the override
to the memcg that instantiated the cache page, which may differ.

Make memalloc_use_memcg() return the old memcg and convert existing
users to a stacking model. Delete the unused memalloc_unuse_memcg().

Signed-off-by: Johannes Weiner <han...@cmpxchg.org>
Reviewed-by: Shakeel Butt <shake...@google.com>
Acked-by: Roman Gushchin <g...@fb.com>
Reported-by: Naresh Kamboju <naresh.kamb...@linaro.org>
---
 fs/buffer.c                          |  6 +++---
 fs/notify/fanotify/fanotify.c        |  5 +++--
 fs/notify/inotify/inotify_fsnotify.c |  5 +++--
 include/linux/sched/mm.h             | 28 +++++++++-------------------
 4 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index a60f60396cfa..585416dec6a2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -851,13 +851,13 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
        struct buffer_head *bh, *head;
        gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
        long offset;
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg, *old_memcg;
 
        if (retry)
                gfp |= __GFP_NOFAIL;
 
        memcg = get_mem_cgroup_from_page(page);
-       memalloc_use_memcg(memcg);
+       old_memcg = memalloc_use_memcg(memcg);
 
        head = NULL;
        offset = PAGE_SIZE;
@@ -876,7 +876,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
                set_bh_page(bh, page, offset);
        }
 out:
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
        mem_cgroup_put(memcg);
        return head;
 /*
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 5435a40f82be..6b869d95bfb6 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -353,6 +353,7 @@ struct fanotify_event *fanotify_alloc_event(struct 
fsnotify_group *group,
        gfp_t gfp = GFP_KERNEL_ACCOUNT;
        struct inode *id = fanotify_fid_inode(inode, mask, data, data_type);
        const struct path *path = fsnotify_data_path(data, data_type);
+       struct mem_cgroup *old_memcg;
 
        /*
         * For queues with unlimited length lost events are not expected and
@@ -366,7 +367,7 @@ struct fanotify_event *fanotify_alloc_event(struct 
fsnotify_group *group,
                gfp |= __GFP_RETRY_MAYFAIL;
 
        /* Whoever is interested in the event, pays for the allocation. */
-       memalloc_use_memcg(group->memcg);
+       old_memcg = memalloc_use_memcg(group->memcg);
 
        if (fanotify_is_perm_event(mask)) {
                struct fanotify_perm_event *pevent;
@@ -451,7 +452,7 @@ struct fanotify_event *fanotify_alloc_event(struct 
fsnotify_group *group,
                }
        }
 out:
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
        return event;
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c 
b/fs/notify/inotify/inotify_fsnotify.c
index 2ebc89047153..52f38e6e81b7 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -69,6 +69,7 @@ int inotify_handle_event(struct fsnotify_group *group,
        int ret;
        int len = 0;
        int alloc_len = sizeof(struct inotify_event_info);
+       struct mem_cgroup *old_memcg;
 
        if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
                return 0;
@@ -93,9 +94,9 @@ int inotify_handle_event(struct fsnotify_group *group,
         * trigger OOM killer in the target monitoring memcg as it may have
         * security repercussion.
         */
-       memalloc_use_memcg(group->memcg);
+       old_memcg = memalloc_use_memcg(group->memcg);
        event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
 
        if (unlikely(!event)) {
                /*
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c49257a3b510..95e8bfb0cab1 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -316,31 +316,21 @@ static inline void memalloc_nocma_restore(unsigned int 
flags)
  * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
  * given memcg.
  *
- * NOTE: This function is not nesting safe.
+ * NOTE: This function can nest. Users must save the return value and
+ * reset the previous value after their own charging scope is over
  */
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *
+memalloc_use_memcg(struct mem_cgroup *memcg)
 {
-       WARN_ON_ONCE(current->active_memcg);
+       struct mem_cgroup *old = current->active_memcg;
        current->active_memcg = memcg;
-}
-
-/**
- * memalloc_unuse_memcg - Ends the remote memcg charging scope.
- *
- * This function marks the end of the remote memcg charging scope started by
- * memalloc_use_memcg().
- */
-static inline void memalloc_unuse_memcg(void)
-{
-       current->active_memcg = NULL;
+       return old;
 }
 #else
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
-{
-}
-
-static inline void memalloc_unuse_memcg(void)
+static inline struct mem_cgroup *
+memalloc_use_memcg(struct mem_cgroup *memcg)
 {
+       return NULL;
 }
 #endif
 
-- 
2.24.1

Reply via email to