> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h

[ ... ]

> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -1447,6 +1447,276 @@ int __weak arch_bpf_trampoline_size(const struct 
> btf_func_model *m, u32 flags,
>       return -ENOTSUPP;
>  }
>
> +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
> +    defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) && \
> +    defined(CONFIG_BPF_SYSCALL)
> +
> +static void trampoline_lock_all(void)
> +{
> +     int i;
> +
> +     for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
> +             mutex_lock(&trampoline_locks[i].mutex);
> +}
> +
> +static void trampoline_unlock_all(void)
> +{
> +     int i;
> +
> +     for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
> +             mutex_unlock(&trampoline_locks[i].mutex);
> +}
> +
> +static void remove_tracing_multi_data(struct bpf_tracing_multi_data *data)
> +{
> +     ftrace_hash_remove(data->reg);
> +     ftrace_hash_remove(data->unreg);
> +     ftrace_hash_remove(data->modify);
> +}
> +
> +static void clear_tracing_multi_data(struct bpf_tracing_multi_data *data)
> +{
> +     remove_tracing_multi_data(data);
> +
> +     free_ftrace_hash(data->reg);
> +     free_ftrace_hash(data->unreg);
> +     free_ftrace_hash(data->modify);
> +}
> +
> +static int init_tracing_multi_data(struct bpf_tracing_multi_data *data)
> +{
> +     data->reg    = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> +     data->unreg  = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> +     data->modify = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> +
> +     if (!data->reg || !data->unreg || !data->modify) {
> +             clear_tracing_multi_data(data);
> +             return -ENOMEM;
> +     }
> +     return 0;
> +}
> +
> +static void ftrace_hash_add(struct ftrace_hash *hash, struct 
> ftrace_func_entry *entry,
> +                         unsigned long ip, unsigned long direct)
> +{
> +     entry->ip = ip;
> +     entry->direct = direct;
> +     add_ftrace_hash_entry(hash, entry);
> +}
> +
> +static int register_fentry_multi(struct bpf_trampoline *tr, struct 
> bpf_tramp_image *im, void *ptr)
> +{
> +     unsigned long addr = (unsigned long) im->image;
> +     unsigned long ip = ftrace_location(tr->ip);
> +     struct bpf_tracing_multi_data *data = ptr;
> +
> +     if (bpf_trampoline_use_jmp(tr->flags))
> +             addr = ftrace_jmp_set(addr);
> +
> +     ftrace_hash_add(data->reg, data->entry, ip, addr);
> +     tr->cur_image = im;
> +     return 0;
> +}
> +
> +static int unregister_fentry_multi(struct bpf_trampoline *tr, u32 
> orig_flags, void *ptr)
> +{
> +     unsigned long addr = (unsigned long) tr->cur_image->image;
> +     unsigned long ip = ftrace_location(tr->ip);
> +     struct bpf_tracing_multi_data *data = ptr;
> +
> +     if (bpf_trampoline_use_jmp(tr->flags))
> +             addr = ftrace_jmp_set(addr);
> +
> +     ftrace_hash_add(data->unreg, data->entry, ip, addr);
> +     tr->cur_image = NULL;
> +     return 0;
> +}
> +
> +static int modify_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, 
> struct bpf_tramp_image *im,
> +                            bool lock_direct_mutex, void *ptr)
> +{
> +     unsigned long addr = (unsigned long) im->image;
> +     unsigned long ip = ftrace_location(tr->ip);
> +     struct bpf_tracing_multi_data *data = ptr;
> +
> +     if (bpf_trampoline_use_jmp(tr->flags))
> +             addr = ftrace_jmp_set(addr);
> +
> +     ftrace_hash_add(data->modify, data->entry, ip, addr);
> +     tr->cur_image = im;
> +     return 0;
> +}
> +
> +static const struct bpf_trampoline_ops trampoline_multi_ops = {
> +     .register_fentry   = register_fentry_multi,
> +     .unregister_fentry = unregister_fentry_multi,
> +     .modify_fentry     = modify_fentry_multi,
> +};
> +
> +static void bpf_trampoline_multi_attach_init(struct bpf_trampoline *tr)
> +{
> +     tr->multi_attach.old_image = tr->cur_image;
> +     tr->multi_attach.old_flags = tr->flags;
> +}
> +
> +static void bpf_trampoline_multi_attach_free(struct bpf_trampoline *tr)
> +{
> +     if (tr->multi_attach.old_image)
> +             bpf_tramp_image_put(tr->multi_attach.old_image);
> +
> +     tr->multi_attach.old_image = NULL;
> +     tr->multi_attach.old_flags = 0;
> +}
> +
> +static void bpf_trampoline_multi_attach_rollback(struct bpf_trampoline *tr)
> +{
> +     if (tr->cur_image)
> +             bpf_tramp_image_put(tr->cur_image);
> +     tr->cur_image = tr->multi_attach.old_image;
> +     tr->flags = tr->multi_attach.old_flags;
> +
> +     tr->multi_attach.old_image = NULL;
> +     tr->multi_attach.old_flags = 0;
> +}
> +
> +#define for_each_mnode_cnt(mnode, link, cnt) \
> +     for (i = 0, mnode = &link->nodes[i]; i < cnt; i++, mnode = 
> &link->nodes[i])
> +
> +#define for_each_mnode(mnode, link) \
> +     for_each_mnode_cnt(mnode, link, link->nodes_cnt)
> +
> +int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
> +                             struct bpf_tracing_multi_link *link)
> +{
> +     struct bpf_tracing_multi_data *data = &link->data;
> +     struct bpf_attach_target_info tgt_info = {};
> +     struct btf *btf = prog->aux->attach_btf;
> +     struct bpf_tracing_multi_node *mnode;
> +     struct bpf_trampoline *tr;
> +     int i, err, rollback_cnt;
> +     u64 key;
> +
> +     for_each_mnode(mnode, link) {
> +             rollback_cnt = i;
> +
> +             err = bpf_check_attach_btf_id_multi(btf, prog, ids[i], 
> &tgt_info);
> +             if (err)
> +                     goto rollback_put;
> +
> +             key = bpf_trampoline_compute_key(NULL, btf, ids[i]);
> +
> +             tr = bpf_trampoline_get(key, &tgt_info);
> +             if (!tr) {
> +                     err = -ENOMEM;
> +                     goto rollback_put;
> +             }
> +
> +             mnode->trampoline = tr;
> +             mnode->node.link = &link->link;
> +
> +             cond_resched();
> +     }
> +
> +     err = init_tracing_multi_data(data);
> +     if (err) {
> +             rollback_cnt = link->nodes_cnt;
> +             goto rollback_put;
> +     }
> +
> +     trampoline_lock_all();
> +
> +     for_each_mnode(mnode, link) {
> +             bpf_trampoline_multi_attach_init(mnode->trampoline);
> +
> +             data->entry = &mnode->entry;
> +             err = __bpf_trampoline_link_prog(&mnode->node, 
> mnode->trampoline, NULL,
> +                                              &trampoline_multi_ops, data);
> +             if (err) {
> +                     rollback_cnt = i;
> +                     goto rollback_unlink;
> +             }
> +     }

When user-provided ids[] contains duplicate BTF IDs (or distinct IDs
that resolve to the same trampoline key), multiple nodes point to the
same struct bpf_trampoline. The link loop above then calls
bpf_trampoline_multi_attach_init() more than once on that trampoline,
overwriting the saved old_image with the newly assigned cur_image from
the previous iteration.

Scenario with ids[0] == ids[1] and trampoline X starting with OLD_X:

  i=0: attach_init(X) saves old_image=OLD_X
       __bpf_trampoline_link_prog() -> modify_fentry_multi() sets
       X->cur_image=NEW_X (OLD_X refcount not dropped, intent is for
       multi_attach_free() to release it later).

  i=1: attach_init(X) re-runs on the same trampoline and overwrites
       old_image=NEW_X (the only saved reference to OLD_X is lost).
       __bpf_trampoline_link_prog() returns -EBUSY (duplicate prog).
       rollback_cnt=i=1; goto rollback_unlink.

  rollback_unlink calls bpf_trampoline_multi_attach_rollback(X) once:

    static void bpf_trampoline_multi_attach_rollback(struct bpf_trampoline *tr)
    {
        if (tr->cur_image)
            bpf_tramp_image_put(tr->cur_image);    /* puts NEW_X */
        tr->cur_image = tr->multi_attach.old_image;  /* = NEW_X (stale!) */
        ...
    }

Result: OLD_X leaks (its refcount is never decremented). cur_image
points at NEW_X which was just released via bpf_tramp_image_put().

The rollback_put loop calls bpf_trampoline_put() which frees the
trampoline (prog was removed, progs_hlist is empty), so the dangling
NEW_X pointer is cleaned up. However OLD_X remains allocated via
module_alloc forever.

Reachability: commit dbf2afe2f603 ("bpf: Add support for tracing multi
link") copies ids[] from userspace without deduplication. The series'
selftests (commit 411fb40d4b2a "selftests/bpf: Add tracing multi attach
fails test", 'fail#7 (kernel) attach with duplicate id') exercise this
path explicitly, expecting -EBUSY.

The trigger requires the shared trampoline to already have cur_image !=
NULL when the first iteration runs (i.e., another program attached to
the same function), so modify_fentry_multi() is used and there is a
real OLD_X to leak. The first-time-attach case (register_fentry_multi,
OLD_X=NULL) only leaves cur_image dangling briefly until the trampoline
is freed.

Can the code save old_image/old_flags once per trampoline instead of
once per node, or store them per-mnode to avoid overwriting the saved
state?

> +
> +     rollback_cnt = link->nodes_cnt;
> +     if (ftrace_hash_count(data->reg)) {
> +             err = update_ftrace_direct_add(&direct_ops, data->reg);
> +             if (err)
> +                     goto rollback_unlink;
> +     }
> +
> +     if (ftrace_hash_count(data->modify)) {
> +             err = update_ftrace_direct_mod(&direct_ops, data->modify, true);
> +             if (err) {
> +                     WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, 
> data->reg));
> +                                                                            
> ^^^^^^^^^^
> +                     goto rollback_unlink;
> +             }
> +     }

The recovery path unconditionally calls update_ftrace_direct_del with
data->reg, but update_ftrace_direct_add is only invoked when data->reg
is non-empty:

    if (ftrace_hash_count(data->reg)) {
        err = update_ftrace_direct_add(&direct_ops, data->reg);
        ...
    }

When update_ftrace_direct_mod fails and all trampolines already had
cur_image != NULL (so nothing was added to data->reg),
update_ftrace_direct_del is called with an empty hash.
update_ftrace_direct_del at kernel/trace/ftrace.c:6511 checks:

    if (!hash_count(hash))
        return -EINVAL;

so it returns -EINVAL and the WARN_ON_ONCE fires spuriously. Should
this guard match the add path?

    if (ftrace_hash_count(data->reg))
        WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data->reg));

> +
> +     for_each_mnode(mnode, link)
> +             bpf_trampoline_multi_attach_free(mnode->trampoline);
> +
> +     trampoline_unlock_all();
> +
> +     remove_tracing_multi_data(data);
> +     return 0;
> +
> +rollback_unlink:
> +     for_each_mnode_cnt(mnode, link, rollback_cnt) {
> +             bpf_trampoline_remove_prog(mnode->trampoline, &mnode->node);
> +             bpf_trampoline_multi_attach_rollback(mnode->trampoline);
> +     }
> +
> +     trampoline_unlock_all();
> +
> +     clear_tracing_multi_data(data);
> +     rollback_cnt = link->nodes_cnt;
> +
> +rollback_put:
> +     for_each_mnode_cnt(mnode, link, rollback_cnt)
> +             bpf_trampoline_put(mnode->trampoline);
> +
> +     return err;
> +}

[ ... ]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -19658,6 +19658,58 @@ struct bpf_attach_target_info tgt_info = {};
>       return 0;
>  }
>
> +int bpf_check_attach_btf_id_multi(struct btf *btf, struct bpf_prog *prog, 
> u32 btf_id,
> +                               struct bpf_attach_target_info *tgt_info)
> +{
> +     const struct btf_type *t;
> +     unsigned long addr;
> +     const char *tname;
> +     int err;
> +
> +     if (!btf_id || !btf)
> +             return -EINVAL;
> +
> +     /* Check noreturn attachment. */
> +     if (prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
> +          btf_id_set_contains(&noreturn_deny, btf_id))
> +             return -EINVAL;

Operator precedence issue in the noreturn check. The two subexpressions
are joined by ||, so the function returns -EINVAL whenever EITHER:

  (a) the program's expected_attach_type is BPF_TRACE_FEXIT_MULTI
      (regardless of whether btf_id targets a noreturn function), OR

  (b) the btf_id is in the noreturn_deny set (regardless of program
      attach type, rejecting FENTRY_MULTI attachments to noreturn
      functions as well).

The intent is to reject only FEXIT-like multi attachments to noreturn
functions. The non-multi path in check_attach_btf_id() correctly uses
&&:

    } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
               prog->expected_attach_type == BPF_TRACE_FSESSION ||
               prog->expected_attach_type == BPF_MODIFY_RETURN) &&
               btf_id_set_contains(&noreturn_deny, btf_id)) {
        ...
        return -EINVAL;
    }

Consequences once bpf_trampoline_multi_attach() is wired up from
user-space (commit dbf2afe2f603 "bpf: Add support for tracing multi
link"):

  1. Every BPF_TRACE_FEXIT_MULTI program attach request is rejected
     with -EINVAL, even when the target function returns normally. This
     disables BPF_TRACE_FEXIT_MULTI entirely.

  2. Any tracing_multi program (including BPF_TRACE_FENTRY_MULTI) is
     rejected when its btf_id is in noreturn_deny (do_exit,
     __x64_sys_exit, make_task_dead, etc.). Rejecting FENTRY_MULTI on
     noreturn functions is an unintended behavioral restriction.

Forward search in the git range shows commit bfcad202ce99 ("bpf: Add
support for tracing_multi link session") rewrites this hunk to:

    if ((prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
         prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
         btf_id_set_contains(&noreturn_deny, btf_id))
        return -EINVAL;

The parenthesization + && in the later commit confirms this is a
precedence bug. Should this use && instead?

> +
> +     /* Check and get function target data. */
> +     t = btf_type_by_id(btf, btf_id);
> +     if (!t)
> +             return -EINVAL;
> +     tname = btf_name_by_offset(btf, t->name_off);
> +     if (!tname)
> +             return -EINVAL;
> +     if (!btf_type_is_func(t))
> +             return -EINVAL;
> +     t = btf_type_by_id(btf, t->type);
> +     if (!btf_type_is_func_proto(t))
> +             return -EINVAL;
> +     err = btf_distill_func_proto(NULL, btf, t, tname, &tgt_info->fmodel);
> +     if (err < 0)
> +             return err;
> +     if (btf_is_module(btf)) {
> +             /* The bpf program already holds reference to module. */
> +             if (WARN_ON_ONCE(!prog->aux->mod))
> +                     return -EINVAL;
> +             addr = find_kallsyms_symbol_value(prog->aux->mod, tname);
> +     } else {
> +             addr = kallsyms_lookup_name(tname);
> +     }
> +     if (!addr || !ftrace_location(addr))
> +             return -ENOENT;
> +
> +     /* Check sleepable program attachment. */
> +     if (prog->sleepable) {
> +             err = btf_id_allow_sleepable(btf_id, addr, prog, btf);
> +             if (err)
> +                     return err;
> +     }
> +     tgt_info->tgt_addr = addr;
> +     return 0;
> +}
> +
>  struct btf *bpf_get_btf_vmlinux(void)
>  {
>       if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/24598000047

Reply via email to