Re: [RFC PATCH 1/3] memcg: add eBPF struct ops support for memory charging

2025-11-19 Thread Tejun Heo
Hello, Hui.

On Wed, Nov 19, 2025 at 09:34:06AM +0800, Hui Zhu wrote:
> Handlers can inspect this context and modify certain fields
> (e.g., nr_pages) to adjust reclamation behavior. The design
> enforces single active handler to avoid conflicts.

Other folks would know a lot better how this should hook into memcg, but
given that this is a cgroup feature, this should allow hierarchical
delegation. It probably would be a good idea to aling it with the bpf OOM
handler support that Roman is working on.

Thanks.

-- 
tejun



Re: [RFC PATCH 1/3] memcg: add eBPF struct ops support for memory charging

2025-11-18 Thread bot+bpf-ci
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4deda3362..104c9e930 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -68,6 +68,7 @@
>  #include 
>  #include "slab.h"
>  #include "memcontrol-v1.h"
> +#include "memcontrol_bpf.h"
>
>  #include 
>
> @@ -2301,13 +2302,14 @@ static int try_charge_memcg(struct mem_cgroup *memcg, 
> gfp_t gfp_mask,
>   int nr_retries = MAX_RECLAIM_RETRIES;
>   struct mem_cgroup *mem_over_limit;
>   struct page_counter *counter;
> - unsigned long nr_reclaimed;
> + unsigned long nr_reclaime, nr_reclaimed;
>   bool passed_oom = false;
>   unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
>   bool drained = false;
>   bool raised_max_event = false;
>   unsigned long pflags;
>   bool allow_spinning = gfpflags_allow_spinning(gfp_mask);
> + bool charge_done = false;
>
>  retry:
>   if (consume_stock(memcg, nr_pages))
> @@ -2320,20 +2322,30 @@ static int try_charge_memcg(struct mem_cgroup *memcg, 
> gfp_t gfp_mask,
>   if (!do_memsw_account() ||
>   page_counter_try_charge(&memcg->memsw, batch, &counter)) {
>   if (page_counter_try_charge(&memcg->memory, batch, &counter))
> - goto done_restock;
> - if (do_memsw_account())
> - page_counter_uncharge(&memcg->memsw, batch);
> - mem_over_limit = mem_cgroup_from_counter(counter, memory);
> + charge_done = true;
> + else {
> + if (do_memsw_account())
> + page_counter_uncharge(&memcg->memsw, batch);
> + mem_over_limit = mem_cgroup_from_counter(counter, 
> memory);
> + }
>   } else {
>   mem_over_limit = mem_cgroup_from_counter(counter, memsw);
>   reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP;
>   }
>
> - if (batch > nr_pages) {
> + if (!charge_done && batch > nr_pages) {
>   batch = nr_pages;
>   goto retry;
>   }
>
> + nr_reclaime = bpf_try_charge_memcg(memcg, gfp_mask, nr_pages,
> +mem_over_limit,
> +reclaim_options,
> +charge_done);
   

When charge succeeds (charge_done = true), mem_over_limit is never
initialized but gets passed to bpf_try_charge_memcg(). Looking at
memcontrol_bpf.h, the BPF verifier allows read access to all fields
in struct try_charge_memcg via bpf_tracing_btf_ctx_access() (only
write access to nr_pages is restricted by memcg_ops_btf_struct_access).

Can the BPF program read this uninitialized pointer value? This would
leak kernel stack data to the BPF program.

> +
> + if (charge_done)
> + goto done_restock;

[ ... ]

> diff --git a/mm/memcontrol_bpf.h b/mm/memcontrol_bpf.h
> new file mode 100644
> index 0..ee2815fc3
> --- /dev/null
> +++ b/mm/memcontrol_bpf.h

[ ... ]

> +static inline unsigned long
> +bpf_try_charge_memcg(struct mem_cgroup *memcg,
> +  gfp_t gfp_mask,
> +  unsigned int nr_pages,
> +  struct mem_cgroup *mem_over_limit,
> +  unsigned int reclaim_options,
> +  bool charge_done)
> +{
> + struct memcg_ops *ops;
> + struct try_charge_memcg tcm;
> + int update_nr_pages;
> +
> + if (likely(!static_branch_unlikely(&memcg_bpf_enable)))
> + goto out;
> +
> + ops = memcg_ops_get();
> + if (unlikely(!ops))
> + goto out;
> +
> + tcm.memcg = memcg;
> + tcm.gfp_mask = gfp_mask;
> + tcm.nr_pages = nr_pages;
> + tcm.mem_over_limit = mem_over_limit;
  ^^^
Here mem_over_limit gets copied into the tcm struct, which is then
passed to the BPF program at the line below.

> + tcm.reclaim_options = reclaim_options;
> + tcm.charge_done = charge_done;
> +
> + update_nr_pages = ops->try_charge_memcg(&tcm);

[ ... ]


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19486904708