There're several cases like resize and force_empty that don't
need to account to psi, otherwise is misleading.

We also have a module reclaiming dying memcgs at background to
avoid too many dead memcgs which can cause lots of trouble, then
it makes the psi inaccuracy even worse without this patch.

Signed-off-by: Xunlei Pang <xlp...@linux.alibaba.com>
---
 include/linux/swap.h |  3 ++-
 mm/memcontrol.c      | 13 +++++++------
 mm/vmscan.c          |  9 ++++++---
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bfb5c4ac108..74b5443877d4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ extern int __isolate_lru_page(struct page *page, 
isolate_mode_t mode);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                                  unsigned long nr_pages,
                                                  gfp_t gfp_mask,
-                                                 bool may_swap);
+                                                 bool may_swap,
+                                                 bool force_reclaim);
 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
                                                gfp_t gfp_mask, bool noswap,
                                                pg_data_t *pgdat,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f1dfa651f55d..f4ec57876ada 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2237,7 +2237,8 @@ static void reclaim_high(struct mem_cgroup *memcg,
                if (page_counter_read(&memcg->memory) <= memcg->high)
                        continue;
                memcg_memory_event(memcg, MEMCG_HIGH);
-               try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+               try_to_free_mem_cgroup_pages(memcg, nr_pages,
+                               gfp_mask, true, false);
        } while ((memcg = parent_mem_cgroup(memcg)));
 }
 
@@ -2330,7 +2331,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask,
        memcg_memory_event(mem_over_limit, MEMCG_MAX);
 
        nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
-                                                   gfp_mask, may_swap);
+                                        gfp_mask, may_swap, false);
 
        if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
                goto retry;
@@ -2860,7 +2861,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
                }
 
                if (!try_to_free_mem_cgroup_pages(memcg, 1,
-                                       GFP_KERNEL, !memsw)) {
+                                       GFP_KERNEL, !memsw, true)) {
                        ret = -EBUSY;
                        break;
                }
@@ -2993,7 +2994,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup 
*memcg)
                        return -EINTR;
 
                progress = try_to_free_mem_cgroup_pages(memcg, 1,
-                                                       GFP_KERNEL, true);
+                                                       GFP_KERNEL, true, true);
                if (!progress) {
                        nr_retries--;
                        /* maybe some writeback is necessary */
@@ -5549,7 +5550,7 @@ static ssize_t memory_high_write(struct kernfs_open_file 
*of,
        nr_pages = page_counter_read(&memcg->memory);
        if (nr_pages > high)
                try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
-                                            GFP_KERNEL, true);
+                                            GFP_KERNEL, true, true);
 
        memcg_wb_domain_size_changed(memcg);
        return nbytes;
@@ -5596,7 +5597,7 @@ static ssize_t memory_max_write(struct kernfs_open_file 
*of,
 
                if (nr_reclaims) {
                        if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
-                                                         GFP_KERNEL, true))
+                                               GFP_KERNEL, true, true))
                                nr_reclaims--;
                        continue;
                }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7acd0afdfc2a..3831848fca5a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3212,7 +3212,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup 
*memcg,
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                           unsigned long nr_pages,
                                           gfp_t gfp_mask,
-                                          bool may_swap)
+                                          bool may_swap,
+                                          bool force_reclaim)
 {
        struct zonelist *zonelist;
        unsigned long nr_reclaimed;
@@ -3243,13 +3244,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct 
mem_cgroup *memcg,
 
        trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
 
-       psi_memstall_enter(&pflags);
+       if (!force_reclaim)
+               psi_memstall_enter(&pflags);
        noreclaim_flag = memalloc_noreclaim_save();
 
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        memalloc_noreclaim_restore(noreclaim_flag);
-       psi_memstall_leave(&pflags);
+       if (!force_reclaim)
+               psi_memstall_leave(&pflags);
 
        trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
-- 
2.14.4.44.g2045bb6

Reply via email to