commit bdb39f49345e3b0c7f7e26c21c62d627b9de02ca Author: Laslo Hunhold <d...@frign.de> AuthorDate: Wed Jan 5 16:02:23 2022 +0100 Commit: Laslo Hunhold <d...@frign.de> CommitDate: Wed Jan 5 16:12:27 2022 +0100
Calculate benchmark-times as average time taken for each call Add a unit-parameter and calculate all times as average time to call a function once. Previously focusing all on the relative performance, I totally overlooked that the printed times were not for a single comparison but one iteration of the input buffer. In this case, one codepoint break detection on my machine only takes around 7ns!!! This means that the average processing speed is, if we assume on average 1.2-1.5 bytes per codepoint, around 200 MB/s. To put this into perspective, a terminal buffer may have a grid of 70x200 = 14000 visible characters. Calculating breakpoints for all of them would only take around 0.1ms, which is, even if it was done on every draw call, a tiny fraction of the 18ms needed to draw at 60FPS. Signed-off-by: Laslo Hunhold <d...@frign.de> diff --git a/benchmark/character.c b/benchmark/character.c index 130ccc7..c64014e 100644 --- a/benchmark/character.c +++ b/benchmark/character.c @@ -80,10 +80,10 @@ main(int argc, char *argv[]) } printf("%s\n", argv[0]); - run_benchmark(libgrapheme, &p, "libgrapheme ", &baseline, - NUM_ITERATIONS); - run_benchmark(libutf8proc, &p, "libutf8proc ", &baseline, - NUM_ITERATIONS); + run_benchmark(libgrapheme, &p, "libgrapheme ", "comparison", + &baseline, NUM_ITERATIONS, p.bufsiz - 1); + run_benchmark(libutf8proc, &p, "libutf8proc ", "comparison", + &baseline, NUM_ITERATIONS, p.bufsiz - 1); free(p.buf); diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c index e06a77a..ae4d275 100644 --- a/benchmark/utf8-decode.c +++ b/benchmark/utf8-decode.c @@ -107,10 +107,10 @@ main(int argc, char *argv[]) } printf("%s\n", argv[0]); - run_benchmark(libgrapheme, &p, "libgrapheme ", &baseline, - NUM_ITERATIONS); - run_benchmark(libutf8proc, &p, "libutf8proc ", &baseline, - NUM_ITERATIONS); + run_benchmark(libgrapheme, &p, "libgrapheme ", "byte", &baseline, + NUM_ITERATIONS, p.bufsiz); + run_benchmark(libutf8proc, &p, "libutf8proc ", "byte", &baseline, + NUM_ITERATIONS, p.bufsiz); free(cpbuf); free(p.buf_char); diff --git a/benchmark/util.c b/benchmark/util.c index 291eadd..dbf5ccb 100644 --- a/benchmark/util.c +++ b/benchmark/util.c @@ -39,7 +39,8 @@ time_diff(struct timespec *a, struct timespec *b) void run_benchmark(void (*func)(const void *), const void *payload, - const char *name, double *baseline, uint32_t num_iterations) + const char *name, const char *unit, double *baseline, + size_t num_iterations, size_t units_per_iteration) { struct timespec start, end; size_t i; @@ -58,13 +59,13 @@ run_benchmark(void (*func)(const void *), const void *payload, } } clock_gettime(CLOCK_MONOTONIC, &end); - diff = time_diff(&start, &end) / num_iterations; + diff = time_diff(&start, &end) / num_iterations / units_per_iteration; if (isnan(*baseline)) { *baseline = diff; - printf(" avg. %.3es (baseline)\n", diff); + printf(" avg. %.3es/%s (baseline)\n", diff, unit); } else { - printf(" avg. %.3es (%.2f%% %s)\n", diff, + printf(" avg. %.3es/%s (%.2f%% %s)\n", diff, unit, fabs(1.0 - diff / *baseline) * 100, (diff < *baseline) ? "faster" : "slower"); } diff --git a/benchmark/util.h b/benchmark/util.h index 380d48c..3b3f238 100644 --- a/benchmark/util.h +++ b/benchmark/util.h @@ -8,6 +8,6 @@ uint32_t *generate_test_buffer(const struct test *, size_t, size_t *); void run_benchmark(void (*func)(const void *), const void *, const char *, - double *, uint32_t); + const char *, double *, size_t, size_t); #endif /* UTIL_H */