On Thu, Jan 15, 2026 at 10:48 AM Andrii Nakryiko
<[email protected]> wrote:
>
> On Mon, Jan 12, 2026 at 1:50 PM Jiri Olsa <[email protected]> wrote:
> >
> > Adding support to call bpf_get_stackid helper from trigger programs,
> > so far added for kprobe multi.
> >
> > Adding the --stacktrace/-g option to enable it.
> >
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > tools/testing/selftests/bpf/bench.c | 4 ++++
> > tools/testing/selftests/bpf/bench.h | 1 +
> > .../selftests/bpf/benchs/bench_trigger.c | 1 +
> > .../selftests/bpf/progs/trigger_bench.c | 18 ++++++++++++++++++
> > 4 files changed, 24 insertions(+)
> >
>
> This now actually becomes a stack trace benchmark :) But I don't mind,
> I think it would be good to be able to benchmark this. But I think we
> should then implement it for all different tracing programs (tp,
> raw_tp, fentry/fexit/fmod_ret) for consistency and so we can compare
> and contrast?...
>
> > diff --git a/tools/testing/selftests/bpf/bench.c
> > b/tools/testing/selftests/bpf/bench.c
> > index bd29bb2e6cb5..8dadd9c928ec 100644
> > --- a/tools/testing/selftests/bpf/bench.c
> > +++ b/tools/testing/selftests/bpf/bench.c
> > @@ -265,6 +265,7 @@ static const struct argp_option opts[] = {
> > { "verbose", 'v', NULL, 0, "Verbose debug output"},
> > { "affinity", 'a', NULL, 0, "Set consumer/producer thread
> > affinity"},
> > { "quiet", 'q', NULL, 0, "Be more quiet"},
> > + { "stacktrace", 'g', NULL, 0, "Get stack trace"},
>
> bikeshedding time: why "g"? why not -S or something like that?
>
> > { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
> > "Set of CPUs for producer threads; implies --affinity"},
> > { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
> > @@ -350,6 +351,9 @@ static error_t parse_arg(int key, char *arg, struct
> > argp_state *state)
> > case 'q':
> > env.quiet = true;
> > break;
> > + case 'g':
> > + env.stacktrace = true;
> > + break;
> > case ARG_PROD_AFFINITY_SET:
> > env.affinity = true;
> > if (parse_num_list(arg, &env.prod_cpus.cpus,
> > diff --git a/tools/testing/selftests/bpf/bench.h
> > b/tools/testing/selftests/bpf/bench.h
> > index bea323820ffb..7cf21936e7ed 100644
> > --- a/tools/testing/selftests/bpf/bench.h
> > +++ b/tools/testing/selftests/bpf/bench.h
> > @@ -26,6 +26,7 @@ struct env {
> > bool list;
> > bool affinity;
> > bool quiet;
> > + bool stacktrace;
> > int consumer_cnt;
> > int producer_cnt;
> > int nr_cpus;
> > diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c
> > b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> > index 34018fc3927f..aeec9edd3851 100644
> > --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
> > +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> > @@ -146,6 +146,7 @@ static void setup_ctx(void)
> > bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
> >
> > ctx.skel->rodata->batch_iters = args.batch_iters;
> > + ctx.skel->rodata->stacktrace = env.stacktrace;
> > }
> >
> > static void load_ctx(void)
> > diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c
> > b/tools/testing/selftests/bpf/progs/trigger_bench.c
> > index 2898b3749d07..479400d96fa4 100644
> > --- a/tools/testing/selftests/bpf/progs/trigger_bench.c
> > +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
> > @@ -25,6 +25,23 @@ static __always_inline void inc_counter(void)
> > __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
> > }
> >
> > +volatile const int stacktrace;
> > +
> > +typedef __u64 stack_trace_t[128];
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> > + __uint(max_entries, 16384);
> > + __type(key, __u32);
> > + __type(value, stack_trace_t);
> > +} stackmap SEC(".maps");
oh, and why bother with STACK_TRACE map, just call bpf_get_stack() API
and have maybe per-CPU scratch array for stack trace (per-CPU so that
in multi-cpu benchmarks they don't just contend on the same cache
lines)
> > +
> > +static __always_inline void do_stacktrace(void *ctx)
> > +{
> > + if (stacktrace)
> > + bpf_get_stackid(ctx, &stackmap, 0);
> > +}
> > +
> > SEC("?uprobe")
> > int bench_trigger_uprobe(void *ctx)
> > {
> > @@ -96,6 +113,7 @@ SEC("?kprobe.multi/bpf_get_numa_node_id")
> > int bench_trigger_kprobe_multi(void *ctx)
> > {
> > inc_counter();
> > + do_stacktrace(ctx);
> > return 0;
> > }
> >
> > --
> > 2.52.0
> >