There are several problems with our active list protection algorithm: - relying on sc->may_thrash is wrong and also very slow. sc->may_thrash is set only if we reclaimed nothing after priority dropped down to 0. If we reclaimed something, (which could be slab e.g.) we won't set it. So the active list protection becomes too strong. Instead of sc->may_thrash, use sc->may_shrink_active and sc->has_inactive to identify if memcg tree has cgroups with big inactive.
- Anon aging. On every reclaim cycle we shrink some of active anon list even if we don't wan't to reclaim anon. With active list protection, anon aging makes anon reclaiming a lot more aggressive than page cache and leads to anon thrashing when page cache inactive is low. Move aging under "if (sc->may_shrink_active)" to fix that. https://pmc.acronis.com/browse/VSTOR-20859 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> --- mm/vmscan.c | 55 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 87384a4fb436..a122e4cfa1a4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -103,6 +103,10 @@ struct scan_control { /* Reclaim only slab */ bool slab_only; + bool may_shrink_active; + + bool has_inactive; + /* * The memory cgroup that hit its limit and as a result is the * primary target of this reclaim invocation. @@ -2043,16 +2047,11 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct scan_control *sc) { if (is_active_lru(lru)) { - if (sc->may_thrash && - inactive_list_is_low(lruvec, is_file_lru(lru), memcg, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), memcg, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } - if (sc->may_thrash || - !inactive_list_is_low(lruvec, is_file_lru(lru), - sc->target_mem_cgroup, false)) - return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); - return 0; + return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); } #ifdef CONFIG_MEMCG @@ -2132,6 +2131,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, bool force_scan = false; unsigned long ap, fp; enum lru_list lru; + bool inactive_file_low = inactive_list_is_low(lruvec, true, memcg, false); + bool inactive_anon_low = inactive_list_is_low(lruvec, false, memcg, false); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -2208,7 +2209,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * There is enough inactive page cache, do not reclaim * anything from the anonymous working set right now. */ - if (!inactive_list_is_low(lruvec, true, memcg, false) && + if (!inactive_file_low && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2261,6 +2262,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, fraction[1] = fp; denominator = ap + fp + 1; out: + sc->has_inactive = !inactive_file_low || + ((scan_balance != SCAN_FILE) && !inactive_anon_low); *lru_pages = 0; for_each_evictable_lru(lru) { int file = is_file_lru(lru); @@ -2270,6 +2273,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, size = lruvec_lru_size(lruvec, lru); scan = size >> sc->priority; + if (!sc->may_shrink_active && + ((file && inactive_file_low) || (!file && inactive_anon_low))) + scan = 0; + if (!scan && force_scan) scan = min(size, SWAP_CLUSTER_MAX); @@ -2300,6 +2307,15 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, *lru_pages += size; nr[lru] = scan; } + + /* + * Even if we did not try to evict anon pages at all, we want to + * rebalance the anon lru active/inactive ratio to maintain + * enough reclaim candidates for the next reclaim cycle. + */ + if (scan_balance != SCAN_FILE && inactive_anon_low && + sc->may_shrink_active) + nr[LRU_ACTIVE_ANON] += SWAP_CLUSTER_MAX; } #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH @@ -2429,14 +2445,6 @@ static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg, blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; - /* - * Even if we did not try to evict anon pages at all, we want to - * rebalance the anon lru active/inactive ratio. - */ - if (inactive_list_is_low(lruvec, false, memcg, true)) - shrink_active_list(SWAP_CLUSTER_MAX, lruvec, - sc, LRU_ACTIVE_ANON); - throttle_vm_writeout(sc->gfp_mask); } @@ -2522,6 +2530,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc, unsigned long nr_reclaimed, nr_scanned; gfp_t slab_gfp = sc->gfp_mask; bool slab_only = sc->slab_only; + bool retry; /* Disable fs-related IO for direct reclaim */ if (!sc->target_mem_cgroup && @@ -2538,6 +2547,9 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc, struct mem_cgroup *memcg; struct reclaim_stat stat = {}; + retry = false; + sc->has_inactive = false; + sc->stat = &stat; nr_reclaimed = sc->nr_reclaimed; @@ -2585,6 +2597,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc, } } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim))); + if (!sc->has_inactive && !sc->may_shrink_active) { + sc->may_shrink_active = 1; + retry = true; + continue; + } + if (global_reclaim(sc)) { /* * If reclaim is isolating dirty pages under writeback, it implies @@ -2649,7 +2667,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc, sc->nr_scanned - nr_scanned, sc->nr_reclaimed - nr_reclaimed); - } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, + } while (retry || should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); } @@ -3225,6 +3243,9 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) if (!total_swap_pages) return; + if (!sc->may_shrink_active) + return; + memcg = mem_cgroup_iter(NULL, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); -- 2.19.2 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel