* lib/rawmemchr.c (rawmemchr) [__CHERI__]: Use memchr instead of
one-byte reads.  This sped up a simple benchmark (rawmemchr on
100,000 bytes) by 6x on a Research Morello SoC r0p0 on CheriBSD 14.
[!__CHERI__]: Use sizeof, not alignof, as better alignment
should help performance a bit on some platforms.
* modules/rawmemchr (Depends-on): Remove alignasof.
---
 ChangeLog         | 10 ++++++++++
 lib/rawmemchr.c   | 27 +++++++++++++++++----------
 modules/rawmemchr |  1 -
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c7fa04a173..09f0577925 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2023-11-12  Paul Eggert  <egg...@cs.ucla.edu>
+
+       rawmemchr: speed up, particularly on CHERI
+       * lib/rawmemchr.c (rawmemchr) [__CHERI__]: Use memchr instead of
+       one-byte reads.  This sped up a simple benchmark (rawmemchr on
+       100,000 bytes) by 6x on a Research Morello SoC r0p0 on CheriBSD 14.
+       [!__CHERI__]: Use sizeof, not alignof, as better alignment
+       should help performance a bit on some platforms.
+       * modules/rawmemchr (Depends-on): Remove alignasof.
+
 2023-11-12  Bruno Haible  <br...@clisp.org>
 
        jit/cache: New module.
diff --git a/lib/rawmemchr.c b/lib/rawmemchr.c
index 137d7282a6..9386ffc628 100644
--- a/lib/rawmemchr.c
+++ b/lib/rawmemchr.c
@@ -19,7 +19,7 @@
 /* Specification.  */
 #include <string.h>
 
-/* A function definition is only needed if HAVE_RAWMEMCHR is not defined.  */
+/* A function definition is needed only if HAVE_RAWMEMCHR is not defined.  */
 #if !HAVE_RAWMEMCHR
 
 # include <limits.h>
@@ -30,24 +30,30 @@
 void *
 rawmemchr (const void *s, int c_in)
 {
-#ifdef __CHERI__
-  /* Most architectures let you read an aligned word, even if the unsigned char
-     array at S ends in the middle of the word.  However, CHERI does not.  */
-  typedef unsigned char longword;
-#else
-  /* Change this typedef to experiment with performance.  */
+# ifdef __CHERI__
+  /* Most architectures let you read an aligned word,
+     even if the unsigned char array at S ends in the middle of the word.
+     However CHERI does not, so call memchr
+     with the underlying object's remaining length.
+     This cannot return NULL if S points to a C_IN-terminated array.
+     Use builtins rather than including <cheri.h> which is less stable.  */
+  return memchr (s, c_in, (__builtin_cheri_length_get (s)
+                           - __builtin_cheri_offset_get (s)));
+# else
+
+  /* You can change this typedef to experiment with performance.  */
   typedef uintptr_t longword;
   /* Verify that the longword type lacks padding bits.  */
   static_assert (UINTPTR_WIDTH == UCHAR_WIDTH * sizeof (uintptr_t));
-#endif
 
   const unsigned char *char_ptr;
   unsigned char c = c_in;
 
   /* Handle the first few bytes by reading one byte at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
+     Do this until CHAR_PTR is aligned on a natural longword boundary,
+     as using alignof (longword) might be slower.  */
   for (char_ptr = (const unsigned char *) s;
-       (uintptr_t) char_ptr % alignof (longword) != 0;
+       (uintptr_t) char_ptr % sizeof (longword) != 0;
        ++char_ptr)
     if (*char_ptr == c)
       return (void *) char_ptr;
@@ -123,6 +129,7 @@ rawmemchr (const void *s, int c_in)
   while (*char_ptr != c)
     char_ptr++;
   return (void *) char_ptr;
+# endif
 }
 
 #endif
diff --git a/modules/rawmemchr b/modules/rawmemchr
index 5f3eeaf2d0..aba5088651 100644
--- a/modules/rawmemchr
+++ b/modules/rawmemchr
@@ -7,7 +7,6 @@ lib/rawmemchr.valgrind
 m4/rawmemchr.m4
 
 Depends-on:
-alignasof
 assert-h
 extensions
 stdint
-- 
2.40.1


Reply via email to