On Thu, May 28, 2020 at 04:25:06PM -0700, Roman Gushchin wrote:
> Percpu memory can represent a noticeable chunk of the total
> memory consumption, especially on big machines with many CPUs.
> Let's track percpu memory usage for each memcg and display
> it in memory.stat.
> 
> A percpu allocation is usually scattered over multiple pages
> (and nodes), and can be significantly smaller than a page.
> So let's add a byte-sized counter on the memcg level:
> MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs
> can be perfectly reused for percpu case.
> 
> Signed-off-by: Roman Gushchin <g...@fb.com>
> ---
>  Documentation/admin-guide/cgroup-v2.rst |  4 ++++
>  include/linux/memcontrol.h              |  8 ++++++++
>  mm/memcontrol.c                         |  4 +++-
>  mm/percpu.c                             | 10 ++++++++++
>  4 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/cgroup-v2.rst 
> b/Documentation/admin-guide/cgroup-v2.rst
> index fed4e1d2a343..aa8cb6dadadc 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1276,6 +1276,10 @@ PAGE_SIZE multiple when read back.
>               Amount of memory used for storing in-kernel data
>               structures.
>  
> +       percpu
> +             Amount of memory used for storing per-cpu kernel
> +             data structures.
> +
>         sock
>               Amount of memory used in network transmission buffers
>  
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 7a84d9164449..f62a95d472f7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -32,11 +32,19 @@ struct kmem_cache;
>  enum memcg_stat_item {
>       MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
>       MEMCG_SOCK,
> +     MEMCG_PERCPU_B,
>       /* XXX: why are these zone and not node counters? */
>       MEMCG_KERNEL_STACK_KB,
>       MEMCG_NR_STAT,
>  };
>  
> +static __always_inline bool memcg_stat_item_in_bytes(enum memcg_stat_item 
> item)
> +{
> +     if (item == MEMCG_PERCPU_B)
> +             return true;
> +     return vmstat_item_in_bytes(item);
> +}
> +
>  enum memcg_memory_event {
>       MEMCG_LOW,
>       MEMCG_HIGH,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7bc3fd196210..5007d1585a4a 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -783,7 +783,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, 
> int val)
>       if (mem_cgroup_disabled())
>               return;
>  
> -     if (vmstat_item_in_bytes(idx))
> +     if (memcg_stat_item_in_bytes(idx))
>               threshold <<= PAGE_SHIFT;
>  
>       x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
> @@ -1490,6 +1490,8 @@ static char *memory_stat_format(struct mem_cgroup 
> *memcg)
>       seq_buf_printf(&s, "slab %llu\n",
>                      (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
>                            memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B)));
> +     seq_buf_printf(&s, "percpu %llu\n",
> +                    (u64)memcg_page_state(memcg, MEMCG_PERCPU_B));
>       seq_buf_printf(&s, "sock %llu\n",
>                      (u64)memcg_page_state(memcg, MEMCG_SOCK) *
>                      PAGE_SIZE);
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 85f5755c9114..b4b3e9c8a6d1 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1608,6 +1608,11 @@ static void pcpu_memcg_post_alloc_hook(struct 
> obj_cgroup *objcg,
>  
>       if (chunk) {
>               chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
> +
> +             rcu_read_lock();
> +             mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> +                             size * num_possible_cpus());
> +             rcu_read_unlock();
>       } else {
>               obj_cgroup_uncharge(objcg, size * num_possible_cpus());
>               obj_cgroup_put(objcg);
> @@ -1626,6 +1631,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk 
> *chunk, int off, size_t size)
>  
>       obj_cgroup_uncharge(objcg, size * num_possible_cpus());
>  
> +     rcu_read_lock();
> +     mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> +                     -(size * num_possible_cpus()));
> +     rcu_read_unlock();
> +
>       obj_cgroup_put(objcg);
>  }
>  
> -- 
> 2.25.4
> 

Acked-by: Dennis Zhou <den...@kernel.org>

Thanks,
Dennis

Reply via email to