On Fri, Sep 5, 2025 at 8:13 AM Xu Kuohai <[email protected]> wrote: > > From: Xu Kuohai <[email protected]> > > Add rb-prod test for bpf ring buffer to bench producer performance > without counsumer thread. And add --rb-overwrite option to bench > ring buffer in overwrite mode. > > For reference, below are bench numbers collected from x86_64 and > arm64 CPUs. > > - AMD EPYC 9654 (x86_64) > > Ringbuf, overwrite mode with multi-producer contention, no consumer > =================================================================== > rb-prod nr_prod 1 32.295 ± 0.004M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 2 9.591 ± 0.003M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 3 8.895 ± 0.002M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 4 9.206 ± 0.003M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 8 9.220 ± 0.002M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 12 4.595 ± 0.022M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 16 4.348 ± 0.016M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 20 3.957 ± 0.017M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 24 3.787 ± 0.014M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 28 3.603 ± 0.011M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 32 3.707 ± 0.011M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 36 3.562 ± 0.012M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 40 3.616 ± 0.012M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 44 3.598 ± 0.016M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 48 3.555 ± 0.014M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 52 3.463 ± 0.020M/s (drops 0.000 ± 0.000M/s) > > - HiSilicon Kunpeng 920 (arm64) > > Ringbuf, overwrite mode with multi-producer contention, no consumer > =================================================================== > rb-prod nr_prod 1 14.687 ± 0.058M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 2 22.263 ± 0.007M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 3 5.736 ± 0.003M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 4 4.934 ± 0.001M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 8 4.661 ± 0.001M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 12 3.753 ± 0.013M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 16 3.706 ± 0.018M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 20 3.660 ± 0.015M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 24 3.610 ± 0.016M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 28 3.238 ± 0.010M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 32 3.270 ± 0.018M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 36 2.892 ± 0.021M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 40 2.995 ± 0.018M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 44 2.830 ± 0.019M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 48 2.877 ± 0.015M/s (drops 0.000 ± 0.000M/s) > rb-prod nr_prod 52 2.814 ± 0.015M/s (drops 0.000 ± 0.000M/s) > > Signed-off-by: Xu Kuohai <[email protected]> > --- > tools/testing/selftests/bpf/bench.c | 2 + > .../selftests/bpf/benchs/bench_ringbufs.c | 95 +++++++++++++++++-- > .../bpf/benchs/run_bench_ringbufs.sh | 4 + > .../selftests/bpf/progs/ringbuf_bench.c | 10 ++ > 4 files changed, 103 insertions(+), 8 deletions(-) > > diff --git a/tools/testing/selftests/bpf/bench.c > b/tools/testing/selftests/bpf/bench.c > index bd29bb2e6cb5..a98063f6436a 100644 > --- a/tools/testing/selftests/bpf/bench.c > +++ b/tools/testing/selftests/bpf/bench.c > @@ -541,6 +541,7 @@ extern const struct bench bench_trig_uretprobe_multi_nop5; > > extern const struct bench bench_rb_libbpf; > extern const struct bench bench_rb_custom; > +extern const struct bench bench_rb_prod; > extern const struct bench bench_pb_libbpf; > extern const struct bench bench_pb_custom; > extern const struct bench bench_bloom_lookup; > @@ -617,6 +618,7 @@ static const struct bench *benchs[] = { > /* ringbuf/perfbuf benchmarks */ > &bench_rb_libbpf, > &bench_rb_custom, > + &bench_rb_prod, > &bench_pb_libbpf, > &bench_pb_custom, > &bench_bloom_lookup, > diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c > b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c > index e1ee979e6acc..6d58479fac91 100644 > --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c > +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c > @@ -19,6 +19,7 @@ static struct { > int ringbuf_sz; /* per-ringbuf, in bytes */ > bool ringbuf_use_output; /* use slower output API */ > int perfbuf_sz; /* per-CPU size, in pages */ > + bool overwrite; > } args = { > .back2back = false, > .batch_cnt = 500, > @@ -27,6 +28,7 @@ static struct { > .ringbuf_sz = 512 * 1024, > .ringbuf_use_output = false, > .perfbuf_sz = 128, > + .overwrite = false, > }; > > enum { > @@ -35,6 +37,7 @@ enum { > ARG_RB_BATCH_CNT = 2002, > ARG_RB_SAMPLED = 2003, > ARG_RB_SAMPLE_RATE = 2004, > + ARG_RB_OVERWRITE = 2005, > }; > > static const struct argp_option opts[] = { > @@ -43,6 +46,7 @@ static const struct argp_option opts[] = { > { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record > batch count"}, > { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, > { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification > sample rate"}, > + { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"}, > {}, > }; > > @@ -72,6 +76,9 @@ static error_t parse_arg(int key, char *arg, struct > argp_state *state) > argp_usage(state); > } > break; > + case ARG_RB_OVERWRITE: > + args.overwrite = true; > + break; > default: > return ARGP_ERR_UNKNOWN; > } > @@ -95,8 +102,30 @@ static inline void bufs_trigger_batch(void) > > static void bufs_validate(void) > { > - if (env.consumer_cnt != 1) { > - fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); > + bool bench_prod = !strcmp(env.bench_name, "rb-prod"); > + > + if (args.overwrite && !bench_prod) { > + fprintf(stderr, "overwite mode only works with benchmakr > rb-prod!\n"); > + exit(1); > + } > + > + if (bench_prod && env.consumer_cnt != 0) { > + fprintf(stderr, "rb-prod benchmark does not need > consumer!\n"); > + exit(1); > + } > + > + if (bench_prod && args.back2back) { > + fprintf(stderr, "back-to-back mode makes no sense for > rb-prod!\n"); > + exit(1); > + } > + > + if (bench_prod && args.sampled) { > + fprintf(stderr, "sampling mode makes no sense for > rb-prod!\n"); > + exit(1); > + } > + > + if (!bench_prod && env.consumer_cnt != 1) { > + fprintf(stderr, "benchmarks excluding rb-prod need one > consumer!\n"); > exit(1); > } > > @@ -132,8 +161,10 @@ static void ringbuf_libbpf_measure(struct bench_res *res) > res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); > } > > -static struct ringbuf_bench *ringbuf_setup_skeleton(void) > +static struct ringbuf_bench *ringbuf_setup_skeleton(int bench_prod)
int because C doesn't support bool?... but really, do we need another benchmark just to set overwritable mode?... can't you adapt existing benchmarks to optionally set overwritable mode? (and please drop [email protected] from CC for the next revision, that email doesn't exist anymore) [...]

