> diff --git a/mm/bpf_memcontrol.c b/mm/bpf_memcontrol.c
> index 415e3c59b6a6..0ded1fdd0de8 100644
> --- a/mm/bpf_memcontrol.c
> +++ b/mm/bpf_memcontrol.c

[ ... ]

> static int bpf_memcg_ops_reg(void *kdata, struct bpf_link *link)
> {
>       struct bpf_struct_ops_link *ops_link
>               = container_of(link, struct bpf_struct_ops_link, link);
>       struct memcg_bpf_ops *ops = kdata;
>       struct mem_cgroup *memcg, *iter = NULL;
>       int err = 0;
>
> +     if (ops_link->flags & ~BPF_F_ALLOW_OVERRIDE) {
> +             pr_err("attach only support BPF_F_ALLOW_OVERRIDE\n");
> +             return -EOPNOTSUPP;
> +     }
> +
>       memcg = mem_cgroup_get_from_ino(ops_link->cgroup_id);
>       if (IS_ERR_OR_NULL(memcg))
>               return PTR_ERR(memcg);
>
>       cgroup_lock();
> +
> +     if (READ_ONCE(memcg->bpf_ops)) {
> +             /* Check if bpf_ops of the parent is BPF_F_ALLOW_OVERRIDE. */
> +             if (memcg->bpf_ops_flags & BPF_F_ALLOW_OVERRIDE) {
> +                     iter = parent_mem_cgroup(memcg);
> +
> +                     if (!iter)
> +                             goto busy_out;
> +                     if (READ_ONCE(iter->bpf_ops) !=
> +                         READ_ONCE(memcg->bpf_ops))
> +                             goto busy_out;
> +             } else {
> +busy_out:
> +                     err = -EBUSY;
> +                     goto unlock_out;
> +             }
> +     }
> +
>       while ((iter = mem_cgroup_iter(memcg, iter, NULL))) {
                                      ^^^^

When the override check passes (all conditions at lines 363-377 are met),
iter is set to parent_mem_cgroup(memcg) but is never reset to NULL before
this loop. Should iter be reset to NULL here?

The mem_cgroup_iter() function expects prev (iter) to be either NULL or a
previous result from iterating within memcg's descendants. When iter points
to memcg's parent, the iteration will not correctly traverse memcg's
subtree since parent is outside that subtree.

For comparison, bpf_memcg_ops_unreg() correctly resets iter to NULL before
its loop:

> +     iter = NULL;
> +     while ((iter = mem_cgroup_iter(memcg, iter, NULL))) {

>               if (READ_ONCE(iter->bpf_ops)) {
> -                     mem_cgroup_iter_break(memcg, iter);
> -                     err = -EBUSY;
> -                     break;
> +                     /* cannot override existing bpf_ops of sub-cgroup. */
> +                     continue;
>               }
>               WRITE_ONCE(iter->bpf_ops, ops);
> +             iter->bpf_ops_flags = ops_link->flags;
>       }

[ ... ]


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/21280790825

Reply via email to