On 01/09/2018 07:58 PM, Andrey Ryabinin wrote:
> mem_cgroup_resize_[memsw]_limit() tries to free only 32 (SWAP_CLUSTER_MAX)
> pages on each iteration. This makes practically impossible to decrease
> limit of memory cgroup. Tasks could easily allocate back 32 pages,
> so we can't reduce memory usage, and once retry_count reaches zero we return
> -EBUSY.
> 
> Easy to reproduce the problem by running the following commands:
> 
>   mkdir /sys/fs/cgroup/memory/test
>   echo $$ >> /sys/fs/cgroup/memory/test/tasks
>   cat big_file > /dev/null &
>   sleep 1 && echo $((100*1024*1024)) > 
> /sys/fs/cgroup/memory/test/memory.limit_in_bytes
>   -bash: echo: write error: Device or resource busy
> 
> Instead of relying on retry_count, keep retrying the reclaim until
> the desired limit is reached or fail if the reclaim doesn't make
> any progress or a signal is pending.
> 
> Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
> ---
> 
> Changes since v2:
>  - Changelog wording per mhocko@
> 


Ugh, sorry, I forgot to +Cc Michal this time.

Changelog, is the only thing than changed between v2 and v3.


>  mm/memcontrol.c | 70 
> +++++++++++++--------------------------------------------
>  1 file changed, 16 insertions(+), 54 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index f40b5ad3f959..0d26db9a665d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1176,20 +1176,6 @@ void mem_cgroup_print_oom_info(struct mem_cgroup 
> *memcg, struct task_struct *p)
>  }
>  
>  /*
> - * This function returns the number of memcg under hierarchy tree. Returns
> - * 1(self count) if no children.
> - */
> -static int mem_cgroup_count_children(struct mem_cgroup *memcg)
> -{
> -     int num = 0;
> -     struct mem_cgroup *iter;
> -
> -     for_each_mem_cgroup_tree(iter, memcg)
> -             num++;
> -     return num;
> -}
> -
> -/*
>   * Return the memory (and swap, if configured) limit for a memcg.
>   */
>  unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
> @@ -2462,22 +2448,10 @@ static DEFINE_MUTEX(memcg_limit_mutex);
>  static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
>                                  unsigned long limit)
>  {
> -     unsigned long curusage;
> -     unsigned long oldusage;
> +     unsigned long usage;
>       bool enlarge = false;
> -     int retry_count;
>       int ret;
>  
> -     /*
> -      * For keeping hierarchical_reclaim simple, how long we should retry
> -      * is depends on callers. We set our retry-count to be function
> -      * of # of children which we should visit in this loop.
> -      */
> -     retry_count = MEM_CGROUP_RECLAIM_RETRIES *
> -                   mem_cgroup_count_children(memcg);
> -
> -     oldusage = page_counter_read(&memcg->memory);
> -
>       do {
>               if (signal_pending(current)) {
>                       ret = -EINTR;
> @@ -2498,15 +2472,13 @@ static int mem_cgroup_resize_limit(struct mem_cgroup 
> *memcg,
>               if (!ret)
>                       break;
>  
> -             try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true);
> -
> -             curusage = page_counter_read(&memcg->memory);
> -             /* Usage is reduced ? */
> -             if (curusage >= oldusage)
> -                     retry_count--;
> -             else
> -                     oldusage = curusage;
> -     } while (retry_count);
> +             usage = page_counter_read(&memcg->memory);
> +             if (!try_to_free_mem_cgroup_pages(memcg, usage - limit,
> +                                     GFP_KERNEL, true)) {
> +                     ret = -EBUSY;
> +                     break;
> +             }
> +     } while (true);
>  
>       if (!ret && enlarge)
>               memcg_oom_recover(memcg);
> @@ -2517,18 +2489,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup 
> *memcg,
>  static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
>                                        unsigned long limit)
>  {
> -     unsigned long curusage;
> -     unsigned long oldusage;
> +     unsigned long usage;
>       bool enlarge = false;
> -     int retry_count;
>       int ret;
>  
> -     /* see mem_cgroup_resize_res_limit */
> -     retry_count = MEM_CGROUP_RECLAIM_RETRIES *
> -                   mem_cgroup_count_children(memcg);
> -
> -     oldusage = page_counter_read(&memcg->memsw);
> -
>       do {
>               if (signal_pending(current)) {
>                       ret = -EINTR;
> @@ -2549,15 +2513,13 @@ static int mem_cgroup_resize_memsw_limit(struct 
> mem_cgroup *memcg,
>               if (!ret)
>                       break;
>  
> -             try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false);
> -
> -             curusage = page_counter_read(&memcg->memsw);
> -             /* Usage is reduced ? */
> -             if (curusage >= oldusage)
> -                     retry_count--;
> -             else
> -                     oldusage = curusage;
> -     } while (retry_count);
> +             usage = page_counter_read(&memcg->memsw);
> +             if (!try_to_free_mem_cgroup_pages(memcg, usage - limit,
> +                                     GFP_KERNEL, false)) {
> +                     ret = -EBUSY;
> +                     break;
> +             }
> +     } while (true);
>  
>       if (!ret && enlarge)
>               memcg_oom_recover(memcg);
> 

Reply via email to