On Mon, Jan 12, 2026 at 1:50 PM Jiri Olsa <[email protected]> wrote:
>
> Adding support to call bpf_get_stackid helper from trigger programs,
> so far added for kprobe multi.
>
> Adding the --stacktrace/-g option to enable it.
>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/testing/selftests/bpf/bench.c | 4 ++++
> tools/testing/selftests/bpf/bench.h | 1 +
> .../selftests/bpf/benchs/bench_trigger.c | 1 +
> .../selftests/bpf/progs/trigger_bench.c | 18 ++++++++++++++++++
> 4 files changed, 24 insertions(+)
>
This now actually becomes a stack trace benchmark :) But I don't mind,
I think it would be good to be able to benchmark this. But I think we
should then implement it for all different tracing programs (tp,
raw_tp, fentry/fexit/fmod_ret) for consistency and so we can compare
and contrast?...
> diff --git a/tools/testing/selftests/bpf/bench.c
> b/tools/testing/selftests/bpf/bench.c
> index bd29bb2e6cb5..8dadd9c928ec 100644
> --- a/tools/testing/selftests/bpf/bench.c
> +++ b/tools/testing/selftests/bpf/bench.c
> @@ -265,6 +265,7 @@ static const struct argp_option opts[] = {
> { "verbose", 'v', NULL, 0, "Verbose debug output"},
> { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
> { "quiet", 'q', NULL, 0, "Be more quiet"},
> + { "stacktrace", 'g', NULL, 0, "Get stack trace"},
bikeshedding time: why "g"? why not -S or something like that?
> { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
> "Set of CPUs for producer threads; implies --affinity"},
> { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
> @@ -350,6 +351,9 @@ static error_t parse_arg(int key, char *arg, struct
> argp_state *state)
> case 'q':
> env.quiet = true;
> break;
> + case 'g':
> + env.stacktrace = true;
> + break;
> case ARG_PROD_AFFINITY_SET:
> env.affinity = true;
> if (parse_num_list(arg, &env.prod_cpus.cpus,
> diff --git a/tools/testing/selftests/bpf/bench.h
> b/tools/testing/selftests/bpf/bench.h
> index bea323820ffb..7cf21936e7ed 100644
> --- a/tools/testing/selftests/bpf/bench.h
> +++ b/tools/testing/selftests/bpf/bench.h
> @@ -26,6 +26,7 @@ struct env {
> bool list;
> bool affinity;
> bool quiet;
> + bool stacktrace;
> int consumer_cnt;
> int producer_cnt;
> int nr_cpus;
> diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c
> b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> index 34018fc3927f..aeec9edd3851 100644
> --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
> +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> @@ -146,6 +146,7 @@ static void setup_ctx(void)
> bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
>
> ctx.skel->rodata->batch_iters = args.batch_iters;
> + ctx.skel->rodata->stacktrace = env.stacktrace;
> }
>
> static void load_ctx(void)
> diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c
> b/tools/testing/selftests/bpf/progs/trigger_bench.c
> index 2898b3749d07..479400d96fa4 100644
> --- a/tools/testing/selftests/bpf/progs/trigger_bench.c
> +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
> @@ -25,6 +25,23 @@ static __always_inline void inc_counter(void)
> __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
> }
>
> +volatile const int stacktrace;
> +
> +typedef __u64 stack_trace_t[128];
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, stack_trace_t);
> +} stackmap SEC(".maps");
> +
> +static __always_inline void do_stacktrace(void *ctx)
> +{
> + if (stacktrace)
> + bpf_get_stackid(ctx, &stackmap, 0);
> +}
> +
> SEC("?uprobe")
> int bench_trigger_uprobe(void *ctx)
> {
> @@ -96,6 +113,7 @@ SEC("?kprobe.multi/bpf_get_numa_node_id")
> int bench_trigger_kprobe_multi(void *ctx)
> {
> inc_counter();
> + do_stacktrace(ctx);
> return 0;
> }
>
> --
> 2.52.0
>