Add new memcg file - memory.cache.limit_in_bytes. Used to limit page cache usage in cgroup.
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> --- mm/memcontrol.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 126 insertions(+), 18 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a165a221e87b..116b303319af 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -314,6 +314,8 @@ struct mem_cgroup { */ struct page_counter dcache; + struct page_counter cache; + /* beancounter-related stats */ unsigned long long swap_max; atomic_long_t mem_failcnt; @@ -502,6 +504,7 @@ enum res_type { _MEMSWAP, _OOM_TYPE, _KMEM, + _CACHE, }; #define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) @@ -2771,7 +2774,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed. */ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, bool kmem_charge, - unsigned int nr_pages) + unsigned int nr_pages, bool cache_charge) { unsigned int batch = max(CHARGE_BATCH, nr_pages); int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -2786,12 +2789,22 @@ retry: flags = 0; if (consume_stock(memcg, nr_pages)) { - if (!kmem_charge) - goto done; - if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) + if (kmem_charge && page_counter_try_charge( + &memcg->kmem, nr_pages, &counter)) { + refill_stock(memcg, nr_pages); + goto charge; + } + + if (cache_charge && !page_counter_try_charge( + &memcg->cache, nr_pages, &counter)) goto done; + + refill_stock(memcg, nr_pages); + if (kmem_charge) + page_counter_uncharge(&memcg->kmem, nr_pages); } +charge: mem_over_limit = NULL; if (!page_counter_try_charge(&memcg->memory, batch, &counter)) { if (do_swap_account && page_counter_try_charge( @@ -2804,15 +2817,29 @@ retry: mem_over_limit = mem_cgroup_from_counter(counter, memory); if (!mem_over_limit && kmem_charge) { - if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) + if (page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) { + flags |= MEM_CGROUP_RECLAIM_KMEM; + mem_over_limit = mem_cgroup_from_counter(counter, kmem); + page_counter_uncharge(&memcg->memory, batch); + if (do_swap_account) + page_counter_uncharge(&memcg->memsw, batch); + } + } + + if (!mem_over_limit && cache_charge) { + if (!page_counter_try_charge(&memcg->cache, nr_pages, &counter)) goto done_restock; - flags |= MEM_CGROUP_RECLAIM_KMEM; - mem_over_limit = mem_cgroup_from_counter(counter, kmem); + flags |= MEM_CGROUP_RECLAIM_NOSWAP; + mem_over_limit = mem_cgroup_from_counter(counter, cache); page_counter_uncharge(&memcg->memory, batch); if (do_swap_account) page_counter_uncharge(&memcg->memsw, batch); - } else if (!mem_over_limit) + if (kmem_charge) + page_counter_uncharge(&memcg->kmem, batch); + } + + if (!mem_over_limit) goto done_restock; if (batch > nr_pages) { @@ -2898,12 +2925,15 @@ done: return 0; } -static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) +static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages, + bool cache_charge) { if (!mem_cgroup_is_root(memcg)) { page_counter_uncharge(&memcg->memory, nr_pages); if (do_swap_account) page_counter_uncharge(&memcg->memsw, nr_pages); + if (cache_charge) + page_counter_uncharge(&memcg->cache, nr_pages); } } @@ -3068,7 +3098,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, { int ret = 0; - ret = try_charge(memcg, gfp, true, nr_pages); + ret = try_charge(memcg, gfp, true, nr_pages, false); if (ret == -EINTR) { /* * try_charge() chose to bypass to root due to OOM kill or @@ -4327,6 +4357,9 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, case _KMEM: counter = &memcg->kmem; break; + case _CACHE: + counter = &memcg->cache; + break; default: BUG(); } @@ -4479,6 +4512,57 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg, } #endif /* CONFIG_MEMCG_KMEM */ +static int memcg_update_cache_limit(struct mem_cgroup *memcg, + unsigned long limit) +{ + unsigned long curusage; + unsigned long oldusage; + bool enlarge = false; + int retry_count; + int ret; + + /* + * For keeping hierarchical_reclaim simple, how long we should retry + * is depends on callers. We set our retry-count to be function + * of # of children which we should visit in this loop. + */ + retry_count = MEM_CGROUP_RECLAIM_RETRIES * + mem_cgroup_count_children(memcg); + + oldusage = page_counter_read(&memcg->cache); + + do { + if (signal_pending(current)) { + ret = -EINTR; + break; + } + mutex_lock(&memcg_limit_mutex); + + if (limit > memcg->cache.limit) + enlarge = true; + + ret = page_counter_limit(&memcg->cache, limit); + mutex_unlock(&memcg_limit_mutex); + + if (!ret) + break; + + mem_cgroup_reclaim(memcg, GFP_KERNEL, + MEM_CGROUP_RECLAIM_NOSWAP); + curusage = page_counter_read(&memcg->cache); + /* Usage is reduced ? */ + if (curusage >= oldusage) + retry_count--; + else + oldusage = curusage; + } while (retry_count); + + if (!ret && enlarge) + memcg_oom_recover(memcg); + + return ret; +} + /* * The user of this function is... * RES_LIMIT. @@ -4510,6 +4594,9 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, case _KMEM: ret = memcg_update_kmem_limit(memcg, nr_pages); break; + case _CACHE: + ret = memcg_update_cache_limit(memcg, nr_pages); + break; } break; case RES_SOFT_LIMIT: @@ -4650,6 +4737,9 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) case _KMEM: counter = &memcg->kmem; break; + case _CACHE: + counter = &memcg->cache; + break; default: BUG(); } @@ -5791,6 +5881,12 @@ static struct cftype mem_cgroup_files[] = { .register_event = vmpressure_register_event, .unregister_event = vmpressure_unregister_event, }, + { + .name = "cache.limit_in_bytes", + .private = MEMFILE_PRIVATE(_CACHE, RES_LIMIT), + .write_string = mem_cgroup_write, + .read = mem_cgroup_read, + }, #ifdef CONFIG_NUMA { .name = "numa_stat", @@ -6044,6 +6140,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) page_counter_init(&memcg->memsw, NULL); page_counter_init(&memcg->kmem, NULL); page_counter_init(&memcg->dcache, NULL); + page_counter_init(&memcg->cache, NULL); } memcg->last_scanned_node = MAX_NUMNODES; @@ -6091,6 +6188,7 @@ mem_cgroup_css_online(struct cgroup *cont) page_counter_init(&memcg->memsw, &parent->memsw); page_counter_init(&memcg->kmem, &parent->kmem); page_counter_init(&memcg->dcache, &parent->dcache); + page_counter_init(&memcg->cache, &parent->cache); /* * No need to take a reference to the parent because cgroup @@ -6103,6 +6201,8 @@ mem_cgroup_css_online(struct cgroup *cont) page_counter_init(&memcg->memsw, NULL); page_counter_init(&memcg->kmem, NULL); page_counter_init(&memcg->dcache, NULL); + page_counter_init(&memcg->cache, NULL); + /* * Deeper hierachy with use_hierarchy == false doesn't make * much sense so let cgroup subsystem know about this @@ -6233,19 +6333,19 @@ static int mem_cgroup_do_precharge(unsigned long count) } /* Try a single bulk charge without reclaim first */ - ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, false, count); + ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, false, count, false); if (!ret) { mc.precharge += count; return ret; } if (ret == -EINTR) { - cancel_charge(root_mem_cgroup, count); + cancel_charge(root_mem_cgroup, count, false); return ret; } /* Try charges one by one with reclaim */ while (count--) { - ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, false, 1); + ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, false, 1, false); /* * In case of failure, any residual charges against * mc.to will be dropped by mem_cgroup_clear_mc() @@ -6253,7 +6353,7 @@ static int mem_cgroup_do_precharge(unsigned long count) * bypassed to root right away or they'll be lost. */ if (ret == -EINTR) - cancel_charge(root_mem_cgroup, 1); + cancel_charge(root_mem_cgroup, 1, false); if (ret) return ret; mc.precharge++; @@ -6519,7 +6619,7 @@ static void __mem_cgroup_clear_mc(void) /* we must uncharge all the leftover precharges from mc.to */ if (mc.precharge) { - cancel_charge(mc.to, mc.precharge); + cancel_charge(mc.to, mc.precharge, false); mc.precharge = 0; } /* @@ -6527,7 +6627,7 @@ static void __mem_cgroup_clear_mc(void) * we must uncharge here. */ if (mc.moved_charge) { - cancel_charge(mc.from, mc.moved_charge); + cancel_charge(mc.from, mc.moved_charge, false); mc.moved_charge = 0; } /* we must fixup refcnts and charges */ @@ -6947,6 +7047,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, struct mem_cgroup *memcg = NULL; unsigned int nr_pages = 1; int ret = 0; + bool cache_charge; if (mem_cgroup_disabled()) goto out; @@ -6969,12 +7070,14 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, VM_BUG_ON_PAGE(!PageTransHuge(page), page); } + cache_charge = !PageAnon(page) && !PageSwapBacked(page); + if (do_swap_account && PageSwapCache(page)) memcg = try_get_mem_cgroup_from_page(page); if (!memcg) memcg = get_mem_cgroup_from_mm(mm); - ret = try_charge(memcg, gfp_mask, false, nr_pages); + ret = try_charge(memcg, gfp_mask, false, nr_pages, cache_charge); css_put(&memcg->css); @@ -7049,6 +7152,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) { unsigned int nr_pages = 1; + bool cache_charge; if (mem_cgroup_disabled()) return; @@ -7065,7 +7169,9 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) VM_BUG_ON_PAGE(!PageTransHuge(page), page); } - cancel_charge(memcg, nr_pages); + cache_charge = !PageKmemcg(page) && !PageAnon(page) + && !PageSwapBacked(page); + cancel_charge(memcg, nr_pages, cache_charge); } static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, @@ -7083,6 +7189,8 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, page_counter_uncharge(&memcg->memsw, nr_memsw + nr_kmem); if (nr_kmem) page_counter_uncharge(&memcg->kmem, nr_kmem); + if (nr_file) + page_counter_uncharge(&memcg->cache, nr_file - nr_shmem); memcg_oom_recover(memcg); } -- 2.13.6 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel