Add new memcg file - memory.cache.limit_in_bytes. Used
to limit page cache usage in cgroup.

Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 mm/memcontrol.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 126 insertions(+), 18 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a165a221e87b..116b303319af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -314,6 +314,8 @@ struct mem_cgroup {
         */
        struct page_counter dcache;
 
+       struct page_counter cache;
+
        /* beancounter-related stats */
        unsigned long long swap_max;
        atomic_long_t mem_failcnt;
@@ -502,6 +504,7 @@ enum res_type {
        _MEMSWAP,
        _OOM_TYPE,
        _KMEM,
+       _CACHE,
 };
 
 #define MEMFILE_PRIVATE(x, val)        ((x) << 16 | (val))
@@ -2771,7 +2774,7 @@ static int memcg_cpu_hotplug_callback(struct 
notifier_block *nb,
  * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
  */
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, bool 
kmem_charge,
-                     unsigned int nr_pages)
+                     unsigned int nr_pages, bool cache_charge)
 {
        unsigned int batch = max(CHARGE_BATCH, nr_pages);
        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2786,12 +2789,22 @@ retry:
        flags = 0;
 
        if (consume_stock(memcg, nr_pages)) {
-               if (!kmem_charge)
-                       goto done;
-               if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter))
+               if (kmem_charge && page_counter_try_charge(
+                               &memcg->kmem, nr_pages, &counter)) {
+                       refill_stock(memcg, nr_pages);
+                       goto charge;
+               }
+
+               if (cache_charge && !page_counter_try_charge(
+                               &memcg->cache, nr_pages, &counter))
                        goto done;
+
+               refill_stock(memcg, nr_pages);
+               if (kmem_charge)
+                       page_counter_uncharge(&memcg->kmem, nr_pages);
        }
 
+charge:
        mem_over_limit = NULL;
        if (!page_counter_try_charge(&memcg->memory, batch, &counter)) {
                if (do_swap_account && page_counter_try_charge(
@@ -2804,15 +2817,29 @@ retry:
                mem_over_limit = mem_cgroup_from_counter(counter, memory);
 
        if (!mem_over_limit && kmem_charge) {
-               if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter))
+               if (page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
+                       flags |= MEM_CGROUP_RECLAIM_KMEM;
+                       mem_over_limit = mem_cgroup_from_counter(counter, kmem);
+                       page_counter_uncharge(&memcg->memory, batch);
+                       if (do_swap_account)
+                               page_counter_uncharge(&memcg->memsw, batch);
+               }
+       }
+
+       if (!mem_over_limit && cache_charge) {
+               if (!page_counter_try_charge(&memcg->cache, nr_pages, &counter))
                        goto done_restock;
 
-               flags |= MEM_CGROUP_RECLAIM_KMEM;
-               mem_over_limit = mem_cgroup_from_counter(counter, kmem);
+               flags |= MEM_CGROUP_RECLAIM_NOSWAP;
+               mem_over_limit = mem_cgroup_from_counter(counter, cache);
                page_counter_uncharge(&memcg->memory, batch);
                if (do_swap_account)
                        page_counter_uncharge(&memcg->memsw, batch);
-       } else if (!mem_over_limit)
+               if (kmem_charge)
+                       page_counter_uncharge(&memcg->kmem, batch);
+       }
+
+       if (!mem_over_limit)
                goto done_restock;
 
        if (batch > nr_pages) {
@@ -2898,12 +2925,15 @@ done:
        return 0;
 }
 
-static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages,
+                       bool cache_charge)
 {
        if (!mem_cgroup_is_root(memcg)) {
                page_counter_uncharge(&memcg->memory, nr_pages);
                if (do_swap_account)
                        page_counter_uncharge(&memcg->memsw, nr_pages);
+               if (cache_charge)
+                       page_counter_uncharge(&memcg->cache, nr_pages);
        }
 }
 
@@ -3068,7 +3098,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
 {
        int ret = 0;
 
-       ret = try_charge(memcg, gfp, true, nr_pages);
+       ret = try_charge(memcg, gfp, true, nr_pages, false);
        if (ret == -EINTR)  {
                /*
                 * try_charge() chose to bypass to root due to OOM kill or
@@ -4327,6 +4357,9 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, 
struct cftype *cft,
        case _KMEM:
                counter = &memcg->kmem;
                break;
+       case _CACHE:
+               counter = &memcg->cache;
+               break;
        default:
                BUG();
        }
@@ -4479,6 +4512,57 @@ static int memcg_update_kmem_limit(struct mem_cgroup 
*memcg,
 }
 #endif /* CONFIG_MEMCG_KMEM */
 
+static int memcg_update_cache_limit(struct mem_cgroup *memcg,
+                                  unsigned long limit)
+{
+       unsigned long curusage;
+       unsigned long oldusage;
+       bool enlarge = false;
+       int retry_count;
+       int ret;
+
+       /*
+        * For keeping hierarchical_reclaim simple, how long we should retry
+        * is depends on callers. We set our retry-count to be function
+        * of # of children which we should visit in this loop.
+        */
+       retry_count = MEM_CGROUP_RECLAIM_RETRIES *
+                     mem_cgroup_count_children(memcg);
+
+       oldusage = page_counter_read(&memcg->cache);
+
+       do {
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       break;
+               }
+               mutex_lock(&memcg_limit_mutex);
+
+               if (limit > memcg->cache.limit)
+                       enlarge = true;
+
+               ret = page_counter_limit(&memcg->cache, limit);
+               mutex_unlock(&memcg_limit_mutex);
+
+               if (!ret)
+                       break;
+
+               mem_cgroup_reclaim(memcg, GFP_KERNEL,
+                                  MEM_CGROUP_RECLAIM_NOSWAP);
+               curusage = page_counter_read(&memcg->cache);
+               /* Usage is reduced ? */
+               if (curusage >= oldusage)
+                       retry_count--;
+               else
+                       oldusage = curusage;
+       } while (retry_count);
+
+       if (!ret && enlarge)
+               memcg_oom_recover(memcg);
+
+       return ret;
+}
+
 /*
  * The user of this function is...
  * RES_LIMIT.
@@ -4510,6 +4594,9 @@ static int mem_cgroup_write(struct cgroup *cont, struct 
cftype *cft,
                case _KMEM:
                        ret = memcg_update_kmem_limit(memcg, nr_pages);
                        break;
+               case _CACHE:
+                       ret = memcg_update_cache_limit(memcg, nr_pages);
+                       break;
                }
                break;
        case RES_SOFT_LIMIT:
@@ -4650,6 +4737,9 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned 
int event)
        case _KMEM:
                counter = &memcg->kmem;
                break;
+       case _CACHE:
+               counter = &memcg->cache;
+               break;
        default:
                BUG();
        }
@@ -5791,6 +5881,12 @@ static struct cftype mem_cgroup_files[] = {
                .register_event = vmpressure_register_event,
                .unregister_event = vmpressure_unregister_event,
        },
+       {
+               .name = "cache.limit_in_bytes",
+               .private = MEMFILE_PRIVATE(_CACHE, RES_LIMIT),
+               .write_string = mem_cgroup_write,
+               .read = mem_cgroup_read,
+       },
 #ifdef CONFIG_NUMA
        {
                .name = "numa_stat",
@@ -6044,6 +6140,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
                page_counter_init(&memcg->memsw, NULL);
                page_counter_init(&memcg->kmem, NULL);
                page_counter_init(&memcg->dcache, NULL);
+               page_counter_init(&memcg->cache, NULL);
        }
 
        memcg->last_scanned_node = MAX_NUMNODES;
@@ -6091,6 +6188,7 @@ mem_cgroup_css_online(struct cgroup *cont)
                page_counter_init(&memcg->memsw, &parent->memsw);
                page_counter_init(&memcg->kmem, &parent->kmem);
                page_counter_init(&memcg->dcache, &parent->dcache);
+               page_counter_init(&memcg->cache, &parent->cache);
 
                /*
                 * No need to take a reference to the parent because cgroup
@@ -6103,6 +6201,8 @@ mem_cgroup_css_online(struct cgroup *cont)
                page_counter_init(&memcg->memsw, NULL);
                page_counter_init(&memcg->kmem, NULL);
                page_counter_init(&memcg->dcache, NULL);
+               page_counter_init(&memcg->cache, NULL);
+
                /*
                 * Deeper hierachy with use_hierarchy == false doesn't make
                 * much sense so let cgroup subsystem know about this
@@ -6233,19 +6333,19 @@ static int mem_cgroup_do_precharge(unsigned long count)
        }
 
        /* Try a single bulk charge without reclaim first */
-       ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, false, count);
+       ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, false, count, false);
        if (!ret) {
                mc.precharge += count;
                return ret;
        }
        if (ret == -EINTR) {
-               cancel_charge(root_mem_cgroup, count);
+               cancel_charge(root_mem_cgroup, count, false);
                return ret;
        }
 
        /* Try charges one by one with reclaim */
        while (count--) {
-               ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, false, 1);
+               ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, false, 1, 
false);
                /*
                 * In case of failure, any residual charges against
                 * mc.to will be dropped by mem_cgroup_clear_mc()
@@ -6253,7 +6353,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
                 * bypassed to root right away or they'll be lost.
                 */
                if (ret == -EINTR)
-                       cancel_charge(root_mem_cgroup, 1);
+                       cancel_charge(root_mem_cgroup, 1, false);
                if (ret)
                        return ret;
                mc.precharge++;
@@ -6519,7 +6619,7 @@ static void __mem_cgroup_clear_mc(void)
 
        /* we must uncharge all the leftover precharges from mc.to */
        if (mc.precharge) {
-               cancel_charge(mc.to, mc.precharge);
+               cancel_charge(mc.to, mc.precharge, false);
                mc.precharge = 0;
        }
        /*
@@ -6527,7 +6627,7 @@ static void __mem_cgroup_clear_mc(void)
         * we must uncharge here.
         */
        if (mc.moved_charge) {
-               cancel_charge(mc.from, mc.moved_charge);
+               cancel_charge(mc.from, mc.moved_charge, false);
                mc.moved_charge = 0;
        }
        /* we must fixup refcnts and charges */
@@ -6947,6 +7047,7 @@ int mem_cgroup_try_charge(struct page *page, struct 
mm_struct *mm,
        struct mem_cgroup *memcg = NULL;
        unsigned int nr_pages = 1;
        int ret = 0;
+       bool cache_charge;
 
        if (mem_cgroup_disabled())
                goto out;
@@ -6969,12 +7070,14 @@ int mem_cgroup_try_charge(struct page *page, struct 
mm_struct *mm,
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
        }
 
+       cache_charge = !PageAnon(page) && !PageSwapBacked(page);
+
        if (do_swap_account && PageSwapCache(page))
                memcg = try_get_mem_cgroup_from_page(page);
        if (!memcg)
                memcg = get_mem_cgroup_from_mm(mm);
 
-       ret = try_charge(memcg, gfp_mask, false, nr_pages);
+       ret = try_charge(memcg, gfp_mask, false, nr_pages, cache_charge);
 
        css_put(&memcg->css);
 
@@ -7049,6 +7152,7 @@ void mem_cgroup_commit_charge(struct page *page, struct 
mem_cgroup *memcg,
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
 {
        unsigned int nr_pages = 1;
+       bool cache_charge;
 
        if (mem_cgroup_disabled())
                return;
@@ -7065,7 +7169,9 @@ void mem_cgroup_cancel_charge(struct page *page, struct 
mem_cgroup *memcg)
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
        }
 
-       cancel_charge(memcg, nr_pages);
+       cache_charge = !PageKmemcg(page) && !PageAnon(page)
+               && !PageSwapBacked(page);
+       cancel_charge(memcg, nr_pages, cache_charge);
 }
 
 static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
@@ -7083,6 +7189,8 @@ static void uncharge_batch(struct mem_cgroup *memcg, 
unsigned long pgpgout,
                        page_counter_uncharge(&memcg->memsw, nr_memsw + 
nr_kmem);
                if (nr_kmem)
                        page_counter_uncharge(&memcg->kmem, nr_kmem);
+               if (nr_file)
+                       page_counter_uncharge(&memcg->cache, nr_file - 
nr_shmem);
 
                memcg_oom_recover(memcg);
        }
-- 
2.13.6

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to