Instead of iterating from all cgroups, reclaim cgroup that triggered
allocation first. Don't reclaim cgroup if it causes refaults.

https://pmc.acronis.com/browse/VSTOR-19037
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 include/linux/memcontrol.h |   5 ++
 include/linux/mmzone.h     |   1 +
 mm/vmscan.c                | 122 ++++++++++++++++++++++---------------
 3 files changed, 79 insertions(+), 49 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 575584dc1651..3dc16313a366 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -176,6 +176,11 @@ static inline void mem_cgroup_get(struct mem_cgroup *memcg)
        css_get(mem_cgroup_css(memcg));
 }
 
+static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
+{
+       return css_tryget(mem_cgroup_css(memcg));
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
        css_put(mem_cgroup_css(memcg));
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 70e925d41445..59f53adfc1c5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -807,6 +807,7 @@ typedef struct pglist_data {
                                           mem_hotplug_begin/end() */
        int kswapd_max_order;
        enum zone_type classzone_idx;
+       struct mem_cgroup *memcg;
 #ifdef CONFIG_NUMA_BALANCING
        /* Lock serializing the migrate rate limiting window */
        spinlock_t numabalancing_migrate_lock;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fe651c6047db..583ba1abfc44 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2510,27 +2510,75 @@ static inline bool should_continue_reclaim(struct zone 
*zone,
        }
 }
 
+static bool mem_cgroup_refaults(struct zone *zone, struct mem_cgroup *memcg)
+{
+       if (memcg) {
+               unsigned long refaults = memcg_ws_activates(memcg);
+               unsigned long snapshot = mem_cgroup_zone_lruvec(zone, 
memcg)->refaults;
+
+               return refaults != snapshot;
+       }
+       return false;
+}
+
+static unsigned long shrink_memcg(struct zone *zone, struct scan_control *sc,
+                               struct mem_cgroup *memcg, bool is_classzone)
+{
+       struct mem_cgroup *root = sc->target_mem_cgroup;
+       struct reclaim_state *reclaim_state = current->reclaim_state;
+       unsigned long lru_pages, reclaimed;
+       bool slab_only = sc->slab_only;
+       struct lruvec *lruvec;
+
+       if (!sc->may_thrash && mem_cgroup_low(root, memcg))
+               return 0;
+
+       if (sc->priority && mem_cgroup_refaults(zone, memcg))
+               return 0;
+
+       reclaimed = sc->nr_reclaimed;
+
+       if (!slab_only) {
+               lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+               sc->swappiness = mem_cgroup_swappiness(memcg);
+               shrink_lruvec(lruvec, sc, &lru_pages);
+       }
+
+       if (is_classzone) {
+               shrink_slab(sc->gfp_mask, zone_to_nid(zone),
+                       memcg, sc->priority, false);
+               if (reclaim_state) {
+                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+                       sc->nr_scanned += reclaim_state->reclaimed_slab;
+                       reclaim_state->reclaimed_slab = 0;
+               }
+       }
+
+       return sc->nr_reclaimed - reclaimed;
+}
+
 static void shrink_zone(struct zone *zone, struct scan_control *sc,
                        bool is_classzone)
 {
        struct reclaim_state *reclaim_state = current->reclaim_state;
        unsigned long nr_reclaimed, nr_scanned;
-       gfp_t slab_gfp = sc->gfp_mask;
-       bool slab_only = sc->slab_only;
+       struct mem_cgroup *target_memcg = NULL;
+
+       if (current_is_kswapd()) {
+               target_memcg = smp_load_acquire(&zone->zone_pgdat->memcg);
+               mem_cgroup_get(target_memcg);
+       }
 
-       /* Disable fs-related IO for direct reclaim */
-       if (!sc->target_mem_cgroup &&
-           (current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
-               slab_gfp &= ~__GFP_FS;
+       target_memcg = target_memcg ? : get_mem_cgroup_from_mm(current->mm);
 
        do {
+               unsigned long shrinked;
                struct mem_cgroup *root = sc->target_mem_cgroup;
                struct mem_cgroup_reclaim_cookie reclaim = {
                        .zone = zone,
                        .priority = sc->priority,
                };
-               unsigned long zone_lru_pages = 0;
-               struct mem_cgroup *memcg;
+               struct mem_cgroup *memcg = target_memcg;
                struct reclaim_stat stat = {};
 
                sc->stat = &stat;
@@ -2538,50 +2586,19 @@ static void shrink_zone(struct zone *zone, struct 
scan_control *sc,
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
-               memcg = mem_cgroup_iter(root, NULL, &reclaim);
-               do {
-                       unsigned long lru_pages, scanned;
-                       struct lruvec *lruvec;
-
-                       if (!sc->may_thrash && mem_cgroup_low(root, memcg))
-                               continue;
-
-                       scanned = sc->nr_scanned;
+               shrinked = shrink_memcg(zone, sc, memcg, is_classzone);
 
-                       if (!slab_only) {
-                               lruvec = mem_cgroup_zone_lruvec(zone, memcg);
-                               sc->swappiness = mem_cgroup_swappiness(memcg);
-                               shrink_lruvec(lruvec, sc, &lru_pages);
-                               zone_lru_pages += lru_pages;
-                       }
+               if (!shrinked) {
+                       memcg = mem_cgroup_iter(root, NULL, &reclaim);
+                       do {
+                               shrink_memcg(zone, sc, memcg, is_classzone);
 
-                       if (is_classzone) {
-                               shrink_slab(slab_gfp, zone_to_nid(zone),
-                                           memcg, sc->priority, false);
-                               if (reclaim_state) {
-                                       sc->nr_reclaimed += 
reclaim_state->reclaimed_slab;
-                                       sc->nr_scanned += 
reclaim_state->reclaimed_slab;
-                                       reclaim_state->reclaimed_slab = 0;
+                               if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
+                                       mem_cgroup_iter_break(root, memcg);
+                                       break;
                                }
-
-                       }
-
-                       /*
-                        * Direct reclaim and kswapd have to scan all memory
-                        * cgroups to fulfill the overall scan target for the
-                        * zone.
-                        *
-                        * Limit reclaim, on the other hand, only cares about
-                        * nr_to_reclaim pages to be reclaimed and it will
-                        * retry with decreasing priority if one round over the
-                        * whole hierarchy is not sufficient.
-                        */
-                       if (!global_reclaim(sc) &&
-                                       sc->nr_reclaimed >= sc->nr_to_reclaim) {
-                               mem_cgroup_iter_break(root, memcg);
-                               break;
-                       }
-               } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+                       } while ((memcg = mem_cgroup_iter(root, memcg, 
&reclaim)));
+               }
 
                if (global_reclaim(sc)) {
                        /*
@@ -2649,6 +2666,8 @@ static void shrink_zone(struct zone *zone, struct 
scan_control *sc,
 
        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
+
+       mem_cgroup_put(target_memcg);
 }
 
 /* Returns true if compaction should go ahead for a high-order request */
@@ -3811,6 +3830,7 @@ static int kswapd(void *p)
 void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
 {
        pg_data_t *pgdat;
+       struct mem_cgroup *prev_memcg;
 
        if (!populated_zone(zone))
                return;
@@ -3827,6 +3847,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum 
zone_type classzone_idx)
        if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
                return;
 
+       prev_memcg = xchg(&pgdat->memcg, get_mem_cgroup_from_mm(current->mm));
+       if (prev_memcg)
+               mem_cgroup_put(prev_memcg);
+
        trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
        wake_up_interruptible(&pgdat->kswapd_wait);
 }
-- 
2.19.2

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to