* lib/rawmemchr.c (rawmemchr) [__CHERI__]: Use memchr instead of one-byte reads. This sped up a simple benchmark (rawmemchr on 100,000 bytes) by 6x on a Research Morello SoC r0p0 on CheriBSD 14. [!__CHERI__]: Use sizeof, not alignof, as better alignment should help performance a bit on some platforms. * modules/rawmemchr (Depends-on): Remove alignasof. --- ChangeLog | 10 ++++++++++ lib/rawmemchr.c | 27 +++++++++++++++++---------- modules/rawmemchr | 1 - 3 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/ChangeLog b/ChangeLog index c7fa04a173..09f0577925 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2023-11-12 Paul Eggert <egg...@cs.ucla.edu> + + rawmemchr: speed up, particularly on CHERI + * lib/rawmemchr.c (rawmemchr) [__CHERI__]: Use memchr instead of + one-byte reads. This sped up a simple benchmark (rawmemchr on + 100,000 bytes) by 6x on a Research Morello SoC r0p0 on CheriBSD 14. + [!__CHERI__]: Use sizeof, not alignof, as better alignment + should help performance a bit on some platforms. + * modules/rawmemchr (Depends-on): Remove alignasof. + 2023-11-12 Bruno Haible <br...@clisp.org> jit/cache: New module. diff --git a/lib/rawmemchr.c b/lib/rawmemchr.c index 137d7282a6..9386ffc628 100644 --- a/lib/rawmemchr.c +++ b/lib/rawmemchr.c @@ -19,7 +19,7 @@ /* Specification. */ #include <string.h> -/* A function definition is only needed if HAVE_RAWMEMCHR is not defined. */ +/* A function definition is needed only if HAVE_RAWMEMCHR is not defined. */ #if !HAVE_RAWMEMCHR # include <limits.h> @@ -30,24 +30,30 @@ void * rawmemchr (const void *s, int c_in) { -#ifdef __CHERI__ - /* Most architectures let you read an aligned word, even if the unsigned char - array at S ends in the middle of the word. However, CHERI does not. */ - typedef unsigned char longword; -#else - /* Change this typedef to experiment with performance. */ +# ifdef __CHERI__ + /* Most architectures let you read an aligned word, + even if the unsigned char array at S ends in the middle of the word. + However CHERI does not, so call memchr + with the underlying object's remaining length. + This cannot return NULL if S points to a C_IN-terminated array. + Use builtins rather than including <cheri.h> which is less stable. */ + return memchr (s, c_in, (__builtin_cheri_length_get (s) + - __builtin_cheri_offset_get (s))); +# else + + /* You can change this typedef to experiment with performance. */ typedef uintptr_t longword; /* Verify that the longword type lacks padding bits. */ static_assert (UINTPTR_WIDTH == UCHAR_WIDTH * sizeof (uintptr_t)); -#endif const unsigned char *char_ptr; unsigned char c = c_in; /* Handle the first few bytes by reading one byte at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ + Do this until CHAR_PTR is aligned on a natural longword boundary, + as using alignof (longword) might be slower. */ for (char_ptr = (const unsigned char *) s; - (uintptr_t) char_ptr % alignof (longword) != 0; + (uintptr_t) char_ptr % sizeof (longword) != 0; ++char_ptr) if (*char_ptr == c) return (void *) char_ptr; @@ -123,6 +129,7 @@ rawmemchr (const void *s, int c_in) while (*char_ptr != c) char_ptr++; return (void *) char_ptr; +# endif } #endif diff --git a/modules/rawmemchr b/modules/rawmemchr index 5f3eeaf2d0..aba5088651 100644 --- a/modules/rawmemchr +++ b/modules/rawmemchr @@ -7,7 +7,6 @@ lib/rawmemchr.valgrind m4/rawmemchr.m4 Depends-on: -alignasof assert-h extensions stdint -- 2.40.1