On 2026/1/20 15:46, Andy Shevchenko wrote:
> On Tue, Jan 20, 2026 at 02:58:48PM +0800, Feng Jiang wrote:
>> Introduce a benchmarking framework to the string_kunit test suite to
>> measure the execution efficiency of string functions.
>>
>> The implementation is inspired by crc_benchmark(), measuring throughput
>> (MB/s) and latency (ns/call) across a range of string lengths. It
>> includes a warm-up phase, disables preemption during measurement, and
>> uses a fixed seed for reproducible results.
>>
>> This allows for comparing different implementations (e.g., generic C vs.
>> architecture-optimized assembly) within the KUnit environment.
>>
>> Initially, provide benchmarks for strlen().
> 
> ...
> 
>> +#define STRING_BENCH_SEED   888
>> +#define STRING_BENCH_WORKLOAD       1000000UL
> 
> Can also be (1 * MEGA) from units.h.

Fixed.

> ...
> 
>> +static const size_t bench_lens[] = {
>> +    0, 1, 7, 8, 16, 31, 64, 127, 512, 1024, 3173, 4096
> 
> Leave trailing comma.

Fixed.

> ...
> 
>> +static void *alloc_max_bench_buffer(struct kunit *test,
>> +            const size_t *lens, size_t count, size_t *buf_len)
>> +{
>> +    void *buf;
>> +    size_t i, max_len = 0;
>> +
>> +    for (i = 0; i < count; i++) {
>> +            if (max_len < lens[i])
>> +                    max_len = lens[i];
>> +    }
>> +
>> +    /* Add space for NUL terminator */
>> +    max_len += 1;
> 
>> +    buf = kunit_kzalloc(test, max_len, GFP_KERNEL);
>> +    if (buf && buf_len)
>> +            *buf_len = max_len;
>> +
>> +    return buf;
> 
>       if (!buf)
>               return NULL;
> 
>       *buf_len ...
>       return buf;
> 

Fixed.

> ...
> 
>> +static void fill_random_string(char *buf, size_t len)
>> +{
>> +    size_t i;
>> +    struct rnd_state state;
> 
> Reversed xmas tree ordering?

Fixed.

>> +    if (!buf || !len)
>> +            return;
>> +
>> +    /* Use a fixed seed to ensure deterministic benchmark results */
>> +    prandom_seed_state(&state, 888);
>> +    prandom_bytes_state(&state, buf, len);
>> +
>> +    /* Replace null bytes to avoid early string termination */
>> +    for (i = 0; i < len; i++) {
>> +            if (buf[i] == '\0')
>> +                    buf[i] = 0x01;
>> +    }
>> +
>> +    buf[len - 1] = '\0';
>> +}
> 
> ...
> 
>> +#define STRING_BENCH(iters, func, ...)                                      
>> \
> 
> Is this same / similar code to crc_benchmark()? Perhaps we need to have KUnit
> provided macro / environment to perform such tests... Have you talked to KUnit
> people about all this?
> 

I haven't reached out to the KUnit maintainers yet. This implementation is 
currently
a lightweight adaptation specifically for string benchmarks. However, I agree 
that
a generic KUnit benchmarking harness would be beneficial for the kernel. For 
now,
I'll refine this version based on your feedback.

>> +({                                                                  \
>> +    u64 __bn_t;                                                     \
>> +    size_t __bn_i;                                                  \
>> +    size_t __bn_iters = (iters);                                    \
>> +    size_t __bn_warm_iters = max_t(size_t, __bn_iters / 10, 50U);   \
> 
> Try to avoid max_t() as much as possible. Wouldn't max() suffice?
> 

Will do.

>> +    /* Volatile function pointer prevents dead code elimination */  \
>> +    typeof(func) (* volatile __func) = (func);                      \
>> +                                                                    \
>> +    for (__bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++)            \
>> +            (void)__func(__VA_ARGS__);                              \
>> +                                                                    \
>> +    preempt_disable();                                              \
>> +    __bn_t = ktime_get_ns();                                        \
>> +    for (__bn_i = 0; __bn_i < __bn_iters; __bn_i++)                 \
>> +            (void)__func(__VA_ARGS__);                              \
>> +    __bn_t = ktime_get_ns() - __bn_t;                               \
>> +    preempt_enable();                                               \
>> +    __bn_t;                                                         \
>> +})
>> +
>> +/**
>> + * STRING_BENCH_BUF() - Benchmark harness for single-buffer functions.
>> + * @test: KUnit context.
>> + * @buf_name: Local char * variable name to be defined.
>> + * @buf_size: Local size_t variable name to be defined.
>> + * @func: Function to benchmark.
>> + * @...: Extra arguments for @func.
>> + *
>> + * Prepares a randomized, null-terminated buffer and iterates through 
>> lengths
>> + * in bench_lens, defining @buf_name and @buf_size in each loop.
>> + */
>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)               
>> \
>> +do {                                                                        
>> \
>> +    char *buf_name, *_bn_buf;                                       \
>> +    size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;                \
>> +    u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;                           \
>> +                                                                    \
>> +    if (!IS_ENABLED(CONFIG_STRING_KUNIT_BENCH))                     \
>> +            kunit_skip(test, "not enabled");                        \
>> +                                                                    \
>> +    _bn_buf = alloc_max_bench_buffer(test, bench_lens,              \
>> +                    ARRAY_SIZE(bench_lens), &_bn_size);             \
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);                    \
>> +                                                                    \
>> +    fill_random_string(_bn_buf, _bn_size);                          \
> 
>> +    _bn_buf[_bn_size - 1] = '\0';                                   \
> 
> You have already this there in the function, no?
> 

Indeed, that's redundant. I'll remove it.

>> +    for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {      \
>> +            buf_size = bench_lens[_bn_i];                           \
>> +            buf_name = _bn_buf + _bn_size - buf_size - 1;           \
>> +            _bn_iters = STRING_BENCH_WORKLOAD /                     \
>> +                            max_t(size_t, buf_size, 1U);            \
> 
> max()

Fixed.

>> +            _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);   \
>> +                                                                    \
>> +            if (_bn_t > 0) {                                        \
>> +                    _bn_mbps = (u64)(buf_size) * _bn_iters * 1000;  \
>> +                    _bn_mbps = div64_u64(_bn_mbps, _bn_t);          \
>> +                    _bn_lat = div64_u64(_bn_t, _bn_iters);          \
>> +            }                                                       \
>> +            kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
>> +                            buf_size, _bn_mbps, _bn_lat);           \
>> +    }                                                               \
>> +} while (0)
> 

Thanks for the catch. I will incorporate all your suggestions into v4.

-- 
With Best Regards,
Feng Jiang


Reply via email to