[hackers] [libgrapheme] Generalize benchmark-function with payload-struct || Laslo Hunhold

2022-01-04 Thread git
commit e7b4a99ac11124212811a345563e65d199b1fb79
Author: Laslo Hunhold 
AuthorDate: Tue Jan 4 18:10:09 2022 +0100
Commit: Laslo Hunhold 
CommitDate: Tue Jan 4 18:10:09 2022 +0100

Generalize benchmark-function with payload-struct

Signed-off-by: Laslo Hunhold 

diff --git a/benchmark/character.c b/benchmark/character.c
index 84ea33f..57d7990 100644
--- a/benchmark/character.c
+++ b/benchmark/character.c
@@ -8,61 +8,68 @@
 #include "../gen/character-test.h"
 #include "util.h"
 
-#include 
 #include 
 
-#define NUM_ITERATIONS 1
+#define NUM_ITERATIONS 10
 
 #if defined __has_attribute
#if __has_attribute(optnone)
-   void libgrapheme(const uint32_t *, size_t) 
__attribute__((optnone));
-   void libutf8proc(const uint32_t *, size_t) 
__attribute__((optnone));
+   void libgrapheme(const void *) __attribute__((optnone));
+   void libutf8proc(const void *) __attribute__((optnone));
#endif
 #endif
 
+struct payload {
+   uint_least32_t *buf;
+   size_t bufsiz;
+};
+
 void
-libgrapheme(const uint32_t *buf, size_t bufsiz)
+libgrapheme(const void *payload)
 {
GRAPHEME_STATE state = { 0 };
+   const struct payload *p = payload;
size_t i;
 
-   for (i = 0; i + 1 < bufsiz; i++) {
-   (void)grapheme_is_character_break(buf[i], buf[i+1], &state);
+   for (i = 0; i + 1 < p->bufsiz; i++) {
+   (void)grapheme_is_character_break(p->buf[i], p->buf[i+1],
+ &state);
}
 }
 
 void
-libutf8proc(const uint32_t *buf, size_t bufsiz)
+libutf8proc(const void *payload)
 {
utf8proc_int32_t state = 0;
+   const struct payload *p = payload;
size_t i;
 
-   for (i = 0; i + 1 < bufsiz; i++) {
-   (void)utf8proc_grapheme_break_stateful(buf[i], buf[i+1], 
&state);
+   for (i = 0; i + 1 < p->bufsiz; i++) {
+   (void)utf8proc_grapheme_break_stateful(p->buf[i], p->buf[i+1],
+  &state);
}
 }
 
 int
 main(int argc, char *argv[])
 {
-   size_t bufsiz;
-   uint32_t *buf;
+   struct payload p;
double baseline = NAN;
 
(void)argc;
 
-   if ((buf = generate_test_buffer(character_test, LEN(character_test),
-   &bufsiz)) == NULL) {
+   if ((p.buf = generate_test_buffer(character_test, LEN(character_test),
+ &(p.bufsiz))) == NULL) {
return 1;
}
 
printf("%s\n", argv[0]);
-   run_benchmark(libgrapheme, "libgrapheme ", &baseline, buf, bufsiz,
+   run_benchmark(libgrapheme, &p, "libgrapheme ", &baseline,
  NUM_ITERATIONS);
-   run_benchmark(libutf8proc, "libutf8proc ", &baseline, buf, bufsiz,
+   run_benchmark(libutf8proc, &p, "libutf8proc ", &baseline,
  NUM_ITERATIONS);
 
-   free(buf);
+   free(p.buf);
 
return 0;
 }
diff --git a/benchmark/util.c b/benchmark/util.c
index a30196d..291eadd 100644
--- a/benchmark/util.c
+++ b/benchmark/util.c
@@ -38,9 +38,8 @@ time_diff(struct timespec *a, struct timespec *b)
 }
 
 void
-run_benchmark(void (*func)(const uint32_t *, size_t), const char *name,
-  double *baseline, const uint32_t *buf, size_t bufsiz,
- uint32_t num_iterations)
+run_benchmark(void (*func)(const void *), const void *payload,
+  const char *name, double *baseline, uint32_t num_iterations)
 {
struct timespec start, end;
size_t i;
@@ -51,7 +50,7 @@ run_benchmark(void (*func)(const uint32_t *, size_t), const 
char *name,
 
clock_gettime(CLOCK_MONOTONIC, &start);
for (i = 0; i < num_iterations; i++) {
-   func(buf, bufsiz);
+   func(payload);
 
if (i % (num_iterations / 10) == 0) {
printf(".");
@@ -59,13 +58,13 @@ run_benchmark(void (*func)(const uint32_t *, size_t), const 
char *name,
}
}
clock_gettime(CLOCK_MONOTONIC, &end);
-   diff = time_diff(&start, &end);
+   diff = time_diff(&start, &end) / num_iterations;
 
if (isnan(*baseline)) {
*baseline = diff;
-   printf(" %.3fs (baseline)\n", diff);
+   printf(" avg. %.3es (baseline)\n", diff);
} else {
-   printf(" %.3fs (%.2f%% %s)\n", diff,
+   printf(" avg. %.3es (%.2f%% %s)\n", diff,
   fabs(1.0 - diff / *baseline) * 100,
   (diff < *baseline) ? "faster" : "slower");
}
diff --git a/benchmark/util.h b/benchmark/util.h
index 13320cc..380d48c 100644
--- a/benchmark/util.h
+++ b/benchmark/util.h
@@ -7,7 +7,7 @@
 #define LEN(x) (sizeof(x) / sizeof(*(x)))
 
 uint32_t *generate_test_buffer(const struct test *, size_t, size_t *);
-void run_benchmark(void (*func)(const uint32_t *, size_t), 

[hackers] [libgrapheme] Generate separate utf8proc_int32_t buffer to preserve strict aliasing || Laslo Hunhold

2022-01-04 Thread git
commit 602ae9b2041df6c7e2b1d9f9da2b5ae57eb94b64
Author: Laslo Hunhold 
AuthorDate: Tue Jan 4 18:29:30 2022 +0100
Commit: Laslo Hunhold 
CommitDate: Tue Jan 4 18:33:00 2022 +0100

Generate separate utf8proc_int32_t buffer to preserve strict aliasing

This clearly shows why it's never a good idea to roll your own types
and to better stick with ones provided by the standard library.

Even if the custom type in libutf8proc was defined as an unsigned
32-bit integer type, it could be changed at any point (e.g. to
uint_fast32_t which might default to an unsigned 64 bit type). So we
can't simply cast between the pointers anyway, even if we didn't care
about strict aliasing.

Signed-off-by: Laslo Hunhold 

diff --git a/benchmark/character.c b/benchmark/character.c
index 57d7990..53bb30a 100644
--- a/benchmark/character.c
+++ b/benchmark/character.c
@@ -1,8 +1,10 @@
 /* See LICENSE file for copyright and license details. */
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #include "../grapheme.h"
 #include "../gen/character-test.h"
@@ -21,6 +23,7 @@
 
 struct payload {
uint_least32_t *buf;
+   utf8proc_int32_t *buf_int32;
size_t bufsiz;
 };
 
@@ -45,7 +48,8 @@ libutf8proc(const void *payload)
size_t i;
 
for (i = 0; i + 1 < p->bufsiz; i++) {
-   (void)utf8proc_grapheme_break_stateful(p->buf[i], p->buf[i+1],
+   (void)utf8proc_grapheme_break_stateful(p->buf_int32[i],
+  p->buf_int32[i+1],
   &state);
}
 }
@@ -54,7 +58,8 @@ int
 main(int argc, char *argv[])
 {
struct payload p;
-   double baseline = NAN;
+   double baseline = (double)NAN;
+   size_t i;
 
(void)argc;
 
@@ -62,6 +67,17 @@ main(int argc, char *argv[])
  &(p.bufsiz))) == NULL) {
return 1;
}
+   if ((p.buf_int32 = malloc(p.bufsiz * sizeof(*(p.buf_int32 == NULL) {
+   fprintf(stderr, "malloc: %s\n", strerror(errno));
+   exit(1);
+   }
+   for (i = 0; i < p.bufsiz; i++) {
+   /*
+* there is no overflow, as we know that the maximum
+* codepoint is 0x10, which is way below 2^31
+*/
+   p.buf_int32[i] = (utf8proc_int32_t)p.buf[i];
+   }
 
printf("%s\n", argv[0]);
run_benchmark(libgrapheme, &p, "libgrapheme ", &baseline,



[hackers] [libgrapheme] Add UTF-8 decoder benchmark || Laslo Hunhold

2022-01-04 Thread git
commit 06013743a38729c531a67a63bbfd55b50badddfe
Author: Laslo Hunhold 
AuthorDate: Tue Jan 4 18:11:02 2022 +0100
Commit: Laslo Hunhold 
CommitDate: Tue Jan 4 18:37:33 2022 +0100

Add UTF-8 decoder benchmark

Here we can also see the trouble with the custom types in libutf8proc.

Signed-off-by: Laslo Hunhold 

diff --git a/Makefile b/Makefile
index 2a016ad..f30e0af 100644
--- a/Makefile
+++ b/Makefile
@@ -6,6 +6,7 @@ include config.mk
 
 BENCHMARK =\
benchmark/character\
+   benchmark/utf8-decode\
 
 DATA =\
data/emoji-data.txt\
@@ -37,6 +38,7 @@ MAN7 = man/libgrapheme.7
 all: libgrapheme.a libgrapheme.so
 
 benchmark/character.o: benchmark/character.c config.mk gen/character-test.h 
grapheme.h benchmark/util.h
+benchmark/utf8-decode.o: benchmark/utf8-decode.c config.mk 
gen/character-test.h grapheme.h benchmark/util.h
 benchmark/util.o: benchmark/util.c config.mk benchmark/util.h
 gen/character-prop.o: gen/character-prop.c config.mk gen/util.h
 gen/character-test.o: gen/character-test.c config.mk gen/util.h
@@ -51,6 +53,7 @@ test/utf8-decode.o: test/utf8-decode.c config.mk grapheme.h 
test/util.h
 test/util.o: test/util.c config.mk test/util.h
 
 benchmark/character: benchmark/character.o benchmark/util.o libgrapheme.a
+benchmark/utf8-decode: benchmark/utf8-decode.o benchmark/util.o libgrapheme.a
 gen/character-test: gen/character-test.o gen/util.o
 gen/properties: gen/properties.o gen/util.o
 test/character: test/character.o test/util.o libgrapheme.a
@@ -139,4 +142,4 @@ dist:
tar -cf - "libgrapheme-$(VERSION)" | gzip -c > 
"libgrapheme-$(VERSION).tar.gz"
rm -rf "libgrapheme-$(VERSION)"
 
-.PHONY: all test install uninstall clean clean-data dist
+.PHONY: all benchmark test install uninstall clean clean-data dist
diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
new file mode 100644
index 000..16d117e
--- /dev/null
+++ b/benchmark/utf8-decode.c
@@ -0,0 +1,120 @@
+/* See LICENSE file for copyright and license details. */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../grapheme.h"
+#include "../gen/character-test.h"
+#include "util.h"
+
+#include 
+
+#define NUM_ITERATIONS 10
+
+#if defined __has_attribute
+   #if __has_attribute(optnone)
+   void libgrapheme(const void *) __attribute__((optnone));
+   void libutf8proc(const void *) __attribute__((optnone));
+   #endif
+#endif
+
+struct payload {
+   char *buf_char;
+   utf8proc_uint8_t *buf_uint8;
+   size_t bufsiz;
+};
+
+void
+libgrapheme(const void *payload)
+{
+   const struct payload *p = payload;
+   uint_least32_t cp;
+   size_t ret, off;
+
+   for (off = 0; off < p->bufsiz; off += ret) {
+   if ((ret = grapheme_decode_utf8(p->buf_char + off,
+   p->bufsiz - off, &cp)) >
+   (p->bufsiz - off)) {
+   break;
+   }
+   (void)cp;
+   }
+}
+
+void
+libutf8proc(const void *payload)
+{
+   const struct payload *p = payload;
+   utf8proc_int32_t cp;
+   utf8proc_ssize_t ret;
+   size_t off;
+
+   for (off = 0; off < p->bufsiz; off += (size_t)ret) {
+   if ((ret = utf8proc_iterate(p->buf_uint8 + off,
+   (utf8proc_ssize_t)(p->bufsiz - off),
+   &cp)) < 0) {
+   break;
+   }
+   (void)cp;
+   }
+}
+
+int
+main(int argc, char *argv[])
+{
+   struct payload p;
+   size_t cpbufsiz, i, off, ret;
+   uint32_t *cpbuf;
+   double baseline = (double)NAN;
+
+   (void)argc;
+
+   if ((cpbuf = generate_test_buffer(character_test, LEN(character_test),
+ &cpbufsiz)) == NULL) {
+   return 1;
+   }
+
+   /* convert cp-buffer to utf8-data (both as char and custom uint8-type) 
*/
+   for (i = 0, p.bufsiz = 0; i < cpbufsiz; i++) {
+   p.bufsiz += grapheme_encode_utf8(cpbuf[i], NULL, 0);
+   }
+   if ((p.buf_char = malloc(p.bufsiz)) == NULL) {
+   fprintf(stderr, "malloc: %s\n", strerror(errno));
+   exit(1);
+   }
+   for (i = 0, off = 0; i < cpbufsiz; i++, off += ret) {
+   if ((ret = grapheme_encode_utf8(cpbuf[i], p.buf_char + off,
+   p.bufsiz - off)) >
+   (p.bufsiz - off)) {
+   /* shouldn't happen */
+   fprintf(stderr, "Error while converting buffer.\n");
+   exit(1);
+   }
+   }
+   if ((p.buf_uint8 = malloc(p.bufsiz)) == NULL) { 
+   fprintf(stderr, "malloc: %s\n", strerror(errno));
+   exit(1);
+   }
+   for (i = 0; i < p.bufsiz; i++) {
+   /* 
+* even if char is larger tha

[hackers] [libgrapheme] Use "#ifdef" instead of "#if defined" || Laslo Hunhold

2022-01-04 Thread git
commit 10a95f711451bc653ba5d1f14c0f514e4d854c44
Author: Laslo Hunhold 
AuthorDate: Tue Jan 4 18:47:06 2022 +0100
Commit: Laslo Hunhold 
CommitDate: Tue Jan 4 18:47:06 2022 +0100

Use "#ifdef" instead of "#if defined"

Signed-off-by: Laslo Hunhold 

diff --git a/benchmark/character.c b/benchmark/character.c
index 53bb30a..130ccc7 100644
--- a/benchmark/character.c
+++ b/benchmark/character.c
@@ -14,7 +14,7 @@
 
 #define NUM_ITERATIONS 10
 
-#if defined __has_attribute
+#ifdef __has_attribute
#if __has_attribute(optnone)
void libgrapheme(const void *) __attribute__((optnone));
void libutf8proc(const void *) __attribute__((optnone));
diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
index 16d117e..e06a77a 100644
--- a/benchmark/utf8-decode.c
+++ b/benchmark/utf8-decode.c
@@ -14,7 +14,7 @@
 
 #define NUM_ITERATIONS 10
 
-#if defined __has_attribute
+#ifdef __has_attribute
#if __has_attribute(optnone)
void libgrapheme(const void *) __attribute__((optnone));
void libutf8proc(const void *) __attribute__((optnone));



[hackers] [libgrapheme] Mark likely branches || Laslo Hunhold

2022-01-04 Thread git
commit e917e53a05de7cab2591d6b2fa3f2ce5ece89bcf
Author: Laslo Hunhold 
AuthorDate: Tue Jan 4 18:56:38 2022 +0100
Commit: Laslo Hunhold 
CommitDate: Tue Jan 4 18:56:38 2022 +0100

Mark likely branches

The likely() and unlikely() macros (known from the Linux kernel) in
src/util.h are defined in a portable way.

This addition brings a performance improvment of around 6%, which also
shows how well-optimized grapheme_is_character_break() already is. One
break check for two codepoints now takes only around 10µs on average on
my machine, which is insanely fast when you consider the complexity of
the algorithm behind it.

Signed-off-by: Laslo Hunhold 

diff --git a/src/character.c b/src/character.c
index 80a9a79..27e1cf0 100644
--- a/src/character.c
+++ b/src/character.c
@@ -105,10 +105,10 @@ static const uint_least16_t dont_break_gb12_13[2 * 
NUM_BREAK_PROPS] = {
 static enum break_property
 get_break_prop(uint_least32_t cp)
 {
-   if (cp > 0x10) {
-   return BREAK_PROP_OTHER;
-   } else {
+   if (likely(cp <= 0x10)) {
return prop[minor[major[cp >> 8] + (cp & 0xff)]].break_property;
+   } else {
+   return BREAK_PROP_OTHER;
}
 }
 
@@ -118,11 +118,11 @@ grapheme_is_character_break(uint_least32_t cp0, 
uint_least32_t cp1, GRAPHEME_STA
enum break_property cp0_prop, cp1_prop;
bool notbreak = false;
 
-   if (state) {
-   if (!state->prop_set) {
-   cp0_prop = get_break_prop(cp0);
-   } else {
+   if (likely(state)) {
+   if (likely(state->prop_set)) {
cp0_prop = state->prop;
+   } else {
+   cp0_prop = get_break_prop(cp0);
}
cp1_prop = get_break_prop(cp1);
 
@@ -153,7 +153,7 @@ grapheme_is_character_break(uint_least32_t cp0, 
uint_least32_t cp1, GRAPHEME_STA
UINT16_C(1 << cp1_prop));
 
/* update or reset flags (when we have a break) */
-   if (!notbreak) {
+   if (likely(!notbreak)) {
state->gb11_flag = state->gb12_13_flag = false;
}
} else {
diff --git a/src/util.h b/src/util.h
index 049af2f..b61a026 100644
--- a/src/util.h
+++ b/src/util.h
@@ -10,4 +10,19 @@
 
 #define LEN(x) (sizeof(x) / sizeof(*(x)))
 
+#undef likely
+#undef unlikely
+#ifdef __has_builtin
+   #if __has_builtin(__builtin_expect)
+   #define likely(expr) __builtin_expect(!!(expr), 1)
+   #define unlikely(expr) __builtin_expect(!!(expr), 0)
+   #else
+   #define likely(expr) (expr)
+   #define unlikely(expr) (expr)
+   #endif
+#else
+   #define likely(expr) (expr)
+   #define unlikely(expr) (expr)
+#endif
+
 #endif /* UTIL_H */



Re: [hackers] [libgrapheme] Mark likely branches || Laslo Hunhold

2022-01-04 Thread NRK
> +#ifdef __has_builtin
> + #if __has_builtin(__builtin_expect)
> + #define likely(expr) __builtin_expect(!!(expr), 1)
> + #define unlikely(expr) __builtin_expect(!!(expr), 0)
> + #else
> + #define likely(expr) (expr)
> + #define unlikely(expr) (expr)
> + #endif
> +#else
> + #define likely(expr) (expr)
> + #define unlikely(expr) (expr)
> +#endif

Just curious, why not use:

#if defined(__has_builtin) && __has_builtin(__builtin_expect)
#define likely(expr) __builtin_expect(!!(expr), 1)
#define unlikely(expr) __builtin_expect(!!(expr), 0)
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#endif


- NRK



Re: [hackers] [libgrapheme] Mark likely branches || Laslo Hunhold

2022-01-04 Thread NRK
On Wed, Jan 05, 2022 at 01:56:26AM +0600, NRK wrote:
> Just curious, why not use:
> 
> #if defined(__has_builtin) && __has_builtin(__builtin_expect)
>   #define likely(expr) __builtin_expect(!!(expr), 1)
>   #define unlikely(expr) __builtin_expect(!!(expr), 0)
> #else
>   #define likely(expr) (expr)
>   #define unlikely(expr) (expr)
> #endif

Answering my own question: because it fails if `__has_builtin` is not
defined. I was expecting the 2nd expression wouldn't get evaluated at
all. Should probably take some time and learn more about the
pre-processor sometimes.

- NRK



Re: [hackers] [libgrapheme] Mark likely branches || Laslo Hunhold

2022-01-04 Thread Silvan Jegen
NRK  wrote:
> On Wed, Jan 05, 2022 at 01:56:26AM +0600, NRK wrote:
> > Just curious, why not use:
> > 
> > #if defined(__has_builtin) && __has_builtin(__builtin_expect)
> > #define likely(expr) __builtin_expect(!!(expr), 1)
> > #define unlikely(expr) __builtin_expect(!!(expr), 0)
> > #else
> > #define likely(expr) (expr)
> > #define unlikely(expr) (expr)
> > #endif
> 
> Answering my own question: because it fails if `__has_builtin` is not
> defined. I was expecting the 2nd expression wouldn't get evaluated at
> all. Should probably take some time and learn more about the
> pre-processor sometimes.

Same for me! I didn't know that the C pre-processor doesn't
short-circuit ...

Thanks for answering your own question! As you can see, it's
useful for others as well :)


Cheers,

Silvan